1 /*
2    Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 
25 #define DBLQH_C
26 #include "Dblqh.hpp"
27 #include <ndb_limits.h>
28 #include <md5_hash.hpp>
29 
30 #include <ndb_version.h>
31 #include <signaldata/AccKeyReq.hpp>
32 #include <signaldata/NodeRecoveryStatusRep.hpp>
33 #include <signaldata/TuxBound.hpp>
34 #include <signaldata/AccScan.hpp>
35 #include <signaldata/CopyActive.hpp>
36 #include <signaldata/CopyFrag.hpp>
37 #include <signaldata/CreateTrigImpl.hpp>
38 #include <signaldata/DropTrigImpl.hpp>
39 #include <signaldata/EventReport.hpp>
40 #include <signaldata/ExecFragReq.hpp>
41 #include <signaldata/GCP.hpp>
42 #include <signaldata/TcKeyRef.hpp>
43 #include <signaldata/LqhKey.hpp>
44 #include <signaldata/NextScan.hpp>
45 #include <signaldata/NFCompleteRep.hpp>
46 #include <signaldata/NodeFailRep.hpp>
47 #include <signaldata/ReadNodesConf.hpp>
48 #include <signaldata/RelTabMem.hpp>
49 #include <signaldata/ScanFrag.hpp>
50 #include <signaldata/SrFragidConf.hpp>
51 #include <signaldata/StartFragReq.hpp>
52 #include <signaldata/StartRec.hpp>
53 #include <signaldata/TupKey.hpp>
54 #include <signaldata/TupCommit.hpp>
55 #include <signaldata/LqhFrag.hpp>
56 #include <signaldata/AccFrag.hpp>
57 #include <signaldata/TupFrag.hpp>
58 #include <signaldata/DumpStateOrd.hpp>
59 #include <signaldata/PackedSignal.hpp>
60 #include <signaldata/LqhTransReq.hpp>
61 
62 #include <signaldata/CreateTab.hpp>
63 #include <signaldata/CreateTable.hpp>
64 #include <signaldata/PrepDropTab.hpp>
65 #include <signaldata/DropTab.hpp>
66 #include <signaldata/DropTable.hpp>
67 
68 #include <signaldata/AlterTab.hpp>
69 #include <signaldata/AlterTable.hpp>
70 #include <signaldata/DictTabInfo.hpp>
71 
72 #include <signaldata/LCP.hpp>
73 #include <DebuggerNames.hpp>
74 #include <signaldata/BackupImpl.hpp>
75 #include <signaldata/RestoreImpl.hpp>
76 #include <signaldata/KeyInfo.hpp>
77 #include <signaldata/AttrInfo.hpp>
78 #include <signaldata/TransIdAI.hpp>
79 #include <KeyDescriptor.hpp>
80 #include <signaldata/RouteOrd.hpp>
81 #include <signaldata/FsRef.hpp>
82 #include <SectionReader.hpp>
83 #include <signaldata/SignalDroppedRep.hpp>
84 #include <signaldata/FsReadWriteReq.hpp>
85 #include <signaldata/DbinfoScan.hpp>
86 #include <signaldata/SystemError.hpp>
87 #include <signaldata/FireTrigOrd.hpp>
88 #include <signaldata/IsolateOrd.hpp>
89 #include <signaldata/LocalSysfile.hpp>
90 #include <signaldata/UndoLogLevel.hpp>
91 #include <NdbEnv.h>
92 #include <Checksum.hpp>
93 
94 #include "../suma/Suma.hpp"
95 #include "DblqhCommon.hpp"
96 
97 /**
98  * overload handling...
99  * TODO: cleanup...from all sorts of perspective
100  */
101 #include <TransporterRegistry.hpp>
102 
103 #include <EventLogger.hpp>
104 extern EventLogger * g_eventLogger;
105 
106 #if (defined(VM_TRACE) || defined(ERROR_INSERT))
107 //#define ABORT_TRACE 1
108 //#define DO_TRANSIENT_POOL_STAT 1
109 //#define DEBUG_EXTRA_LCP 1
110 //#define DEBUG_LCP 1
111 //#define DEBUG_LCP_RESTORE
112 //#define DEBUG_COPY 1
113 //#define DEBUG_GCP 1
114 //#define DEBUG_CUT_REDO 1
115 //#define DEBUG_LOCAL_LCP 1
116 //#define DEBUG_LOCAL_LCP_EXTRA 1
117 //#define DEBUG_REDO_FLAG 1
118 //#define DEBUG_TRANSACTION_TIMEOUT 1
119 //#define DEBUG_SCHEMA_VERSION 1
120 //#define DEBUG_EARLY_LCP 1
121 #endif
122 
123 
124 #ifdef DEBUG_EARLY_LCP
125 #define DEB_EARLY_LCP(arglist) do { g_eventLogger->info arglist ; } while (0)
126 #else
127 #define DEB_EARLY_LCP(arglist) do { } while (0)
128 #endif
129 
130 #ifdef DEBUG_SCHEMA_VERSION
131 #define DEB_SCHEMA_VERSION(arglist) do { g_eventLogger->info arglist ; } while (0)
132 #else
133 #define DEB_SCHEMA_VERSION(arglist) do { } while (0)
134 #endif
135 
136 #ifdef DEBUG_EXTRA_LCP
137 #define DEB_EXTRA_LCP(arglist) do { g_eventLogger->info arglist ; } while (0)
138 #else
139 #define DEB_EXTRA_LCP(arglist) do { } while (0)
140 #endif
141 
142 #ifdef DEBUG_LCP
143 #define DEB_LCP(arglist) do { g_eventLogger->info arglist ; } while (0)
144 #else
145 #define DEB_LCP(arglist) do { } while (0)
146 #endif
147 
148 #ifdef DEBUG_LCP_RESTORE
149 #define DEB_LCP_RESTORE(arglist) do { g_eventLogger->info arglist ; } while (0)
150 #else
151 #define DEB_LCP_RESTORE(arglist) do { } while (0)
152 #endif
153 
154 #ifdef DEBUG_COPY
155 #define DEB_COPY(arglist) do { g_eventLogger->info arglist ; } while (0)
156 #else
157 #define DEB_COPY(arglist) do { } while (0)
158 #endif
159 
160 #ifdef DEBUG_GCP
161 #define DEB_GCP(arglist) do { g_eventLogger->info arglist ; } while (0)
162 #else
163 #define DEB_GCP(arglist) do { } while (0)
164 #endif
165 
166 #ifdef DEBUG_CUT_REDO
167 #define DEB_CUT_REDO(arglist) do { g_eventLogger->info arglist ; } while (0)
168 #else
169 #define DEB_CUT_REDO(arglist) do { } while (0)
170 #endif
171 
172 #ifdef DEBUG_LOCAL_LCP
173 #define DEB_LOCAL_LCP(arglist) do { g_eventLogger->info arglist ; } while (0)
174 #else
175 #define DEB_LOCAL_LCP(arglist) do { } while (0)
176 #endif
177 
178 #ifdef DEBUG_LOCAL_LCP_EXTRA
179 #define DEB_LOCAL_LCP_EXTRA(arglist) do { g_eventLogger->info arglist ; } while (0)
180 #else
181 #define DEB_LOCAL_LCP_EXTRA(arglist) do { } while (0)
182 #endif
183 
184 #ifdef DEBUG_REDO_FLAG
185 #define DEB_REDO(arglist) do { g_eventLogger->info arglist ; } while (0)
186 #else
187 #define DEB_REDO(arglist) do { } while (0)
188 #endif
189 
190 // Use LQH_DEBUG to print messages that should be
191 // seen only when we debug the product
192 //#define USE_LQH_DEBUG
193 #ifdef USE_LQH_DEBUG
194 #define LQH_DEBUG(x) ndbout << "DBLQH: "<< x << endl;
195 static
196 NdbOut &
operator <<(NdbOut & out,Dblqh::TcConnectionrec::TransactionState state)197 operator<<(NdbOut& out, Dblqh::TcConnectionrec::TransactionState state){
198   out << (int)state;
199   return out;
200 }
201 
202 static
203 NdbOut &
operator <<(NdbOut & out,Dblqh::TcConnectionrec::LogWriteState state)204 operator<<(NdbOut& out, Dblqh::TcConnectionrec::LogWriteState state){
205   out << (int)state;
206   return out;
207 }
208 
209 static
210 NdbOut &
operator <<(NdbOut & out,Dblqh::TcConnectionrec::AbortState state)211 operator<<(NdbOut& out, Dblqh::TcConnectionrec::AbortState state){
212   out << (int)state;
213   return out;
214 }
215 
216 static
217 NdbOut &
operator <<(NdbOut & out,Dblqh::ScanRecord::ScanState state)218 operator<<(NdbOut& out, Dblqh::ScanRecord::ScanState state){
219   out << (int)state;
220   return out;
221 }
222 
223 #ifdef DEB_TRANSACTION_TIMEOUT
224 static
225 NdbOut &
operator <<(NdbOut & out,Dblqh::LogFileOperationRecord::LfoState state)226 operator<<(NdbOut& out, Dblqh::LogFileOperationRecord::LfoState state){
227   out << (int)state;
228   return out;
229 }
230 #endif
231 static
232 NdbOut &
operator <<(NdbOut & out,Dblqh::ScanRecord::ScanType state)233 operator<<(NdbOut& out, Dblqh::ScanRecord::ScanType state){
234   out << (int)state;
235   return out;
236 }
237 
238 static
239 NdbOut &
operator <<(NdbOut & out,Operation_t op)240 operator<<(NdbOut& out, Operation_t op)
241 {
242   switch(op){
243   case ZREAD: out << "READ"; break;
244   case ZREAD_EX: out << "READ-EX"; break;
245   case ZINSERT: out << "INSERT"; break;
246   case ZUPDATE: out << "UPDATE"; break;
247   case ZDELETE: out << "DELETE"; break;
248   case ZWRITE: out << "WRITE"; break;
249   case ZUNLOCK: out << "UNLOCK"; break;
250   case ZREFRESH: out << "REFRESH"; break;
251   }
252   return out;
253 }
254 
255 #else
256 #define LQH_DEBUG(x)
257 #endif
258 
259 //#define MARKER_TRACE 0
260 //#define TRACE_SCAN_TAKEOVER 1
261 
262 #ifdef VM_TRACE
263 #ifndef NDB_DEBUG_REDO
264 #define NDB_DEBUG_REDO
265 #endif
266 #endif
267 
268 #ifdef NDB_DEBUG_REDO
269 static int DEBUG_REDO = 0;
270 #else
271 #define DEBUG_REDO 0
272 #endif
273 
274 #define DELAY_CHECK_SYSTEM_SCANS 10000
275 /**
276  * System reserved scan ids. Scan ids 0-11 are specific for ACC scans.
277  * Scan ids from 12 and up to a maximum of 133 (configurable) are used
278  * for TUX range scans and finally scan ids of from last TUX range scan
279  * id up to a maximum of 252 is used for TUP full table scans. Scan ids
280  * 253, 254 and 255 are reserved for LCP scans, Backup scans and NR scans.
281  */
282 const Uint32 NR_ScanNo = 253;
283 const Uint32 LCP_ScanNo = 254;
284 const Uint32 Backup_ScanNo = 255;
285 
286 #ifndef NDBD_TRACENR
287 #if defined VM_TRACE
288 #define NDBD_TRACENR
289 #endif
290 #endif
291 
292 #ifdef NDBD_TRACENR
293 #include <NdbConfig.h>
294 static NdbOut * tracenrout = 0;
295 static int TRACENR_FLAG = 0;
296 #define TRACENR(x) (* tracenrout) << x
297 #define SET_TRACENR_FLAG TRACENR_FLAG = 1
298 #define CLEAR_TRACENR_FLAG TRACENR_FLAG = 0
299 #else
300 #define TRACENR_FLAG 0
301 #define TRACENR(x) do { } while(0)
302 #define SET_TRACENR_FLAG
303 #define CLEAR_TRACENR_FLAG
304 #endif
305 
306 #define JAM_FILE_ID 451
307 
308 
309 #ifdef NDBD_TRACENR
310 static NdbOut * traceopout = 0;
311 #define TRACE_OP(regTcPtr, place) do { if (TRACE_OP_CHECK(regTcPtr)) TRACE_OP_DUMP(regTcPtr, place); } while(0)
312 #else
313 #define TRACE_OP(x, y) { (void)x;}
314 #endif
315 
316 struct LogPosition
317 {
318   Uint32 m_file_no;
319   Uint32 m_mbyte;
320 };
321 
322 int
cmp(const LogPosition & pos1,const LogPosition & pos2)323 cmp(const LogPosition& pos1, const LogPosition& pos2)
324 {
325   if (pos1.m_file_no > pos2.m_file_no)
326     return 1;
327   if (pos1.m_file_no < pos2.m_file_no)
328     return -1;
329   if (pos1.m_mbyte > pos2.m_mbyte)
330     return 1;
331   if (pos1.m_mbyte < pos2.m_mbyte)
332     return -1;
333 
334   return 0;
335 }
336 
337 /**
338  * head - tail
339  */
340 static
341 Uint64
free_log(const LogPosition & head,const LogPosition & tail,Uint32 cnt,Uint32 size)342 free_log(const LogPosition& head, const LogPosition& tail,
343          Uint32 cnt, Uint32 size)
344 {
345   Uint64 headmb = head.m_file_no*Uint64(size) + head.m_mbyte;
346   Uint64 tailmb = tail.m_file_no*Uint64(size) + tail.m_mbyte;
347   if (headmb >= tailmb)
348   {
349     return (cnt * Uint64(size)) - headmb + tailmb;
350   }
351   else
352   {
353     return tailmb - headmb;
354   }
355 }
356 
357 /* ------------------------------------------------------------------------- */
358 /* -------               SEND SYSTEM ERROR                           ------- */
359 /*                                                                           */
360 /* ------------------------------------------------------------------------- */
systemError(Signal * signal,int line)361 void Dblqh::systemError(Signal* signal, int line)
362 {
363   signal->theData[0] = DumpStateOrd::LqhSystemError;
364   execDUMP_STATE_ORD(signal);
365   progError(line, NDBD_EXIT_NDBREQUIRE);
366 }//Dblqh::systemError()
367 
368 /* *************** */
369 /*  ACCSEIZEREF  > */
370 /* *************** */
execACCSEIZEREF(Signal * signal)371 void Dblqh::execACCSEIZEREF(Signal* signal)
372 {
373   jamEntry();
374   ndbabort();
375 }//Dblqh::execACCSEIZEREF()
376 
377 /* ******************************************************>> */
378 /* THIS SIGNAL IS USED TO HANDLE REAL-TIME                  */
379 /* BREAKS THAT ARE NECESSARY TO ENSURE REAL-TIME            */
380 /* OPERATION OF LQH.                                        */
381 /* This signal is also used for signal loops, for example   */
382 /* the timeout handling for writing logs every second.      */
383 /* ******************************************************>> */
execCONTINUEB(Signal * signal)384 void Dblqh::execCONTINUEB(Signal* signal)
385 {
386   jamEntry();
387   Uint32 tcase = signal->theData[0];
388   Uint32 data0 = signal->theData[1];
389   Uint32 data1 = signal->theData[2];
390   Uint32 data2 = signal->theData[3];
391   LogPartRecordPtr save;
392   TcConnectionrecPtr tcConnectptr;
393   switch (tcase) {
394   case ZPGMAN_PREP_LCP_ACTIVE_CHECK:
395   {
396     if (data1 == 0)
397     {
398       jam();
399       check_pgman_prep_lcp_active_prep_drop_tab(signal, data0);
400     }
401     else
402     {
403       jam();
404       check_pgman_prep_lcp_active_drop_tab(signal, data0);
405     }
406     return;
407   }
408   case ZLQH_SHRINK_TRANSIENT_POOLS:
409   {
410     jam();
411     Uint32 pool_index = signal->theData[1];
412     ndbassert(signal->getLength() == 2);
413     shrinkTransientPools(pool_index);
414     return;
415   }
416 #if (defined(VM_TRACE) || \
417      defined(ERROR_INSERT)) && \
418     defined(DO_TRANSIENT_POOL_STAT)
419 
420   case ZLQH_TRANSIENT_POOL_STAT:
421   {
422     for (Uint32 pool_index = 0;
423          pool_index < c_transient_pool_count;
424          pool_index++)
425     {
426       g_eventLogger->info(
427         "DBLQH %u: Transient slot pool %u %p: Entry size %u:"
428        " Free %u: Used %u: Used high %u: Size %u: For shrink %u",
429        instance(),
430        pool_index,
431        c_transient_pools[pool_index],
432        c_transient_pools[pool_index]->getEntrySize(),
433        c_transient_pools[pool_index]->getNoOfFree(),
434        c_transient_pools[pool_index]->getUsed(),
435        c_transient_pools[pool_index]->getUsedHi(),
436        c_transient_pools[pool_index]->getSize(),
437        c_transient_pools_shrinking.get(pool_index));
438     }
439     sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 5000, 1);
440     break;
441   }
442 #endif
443 
444   case ZSTART_QUEUED_SCAN:
445   {
446     jamDebug();
447     restart_queued_scan(signal, data0);
448     return;
449   }
450   case ZCHECK_SYSTEM_SCANS:
451   {
452     handle_check_system_scans(signal);
453     signal->theData[0] = ZCHECK_SYSTEM_SCANS;
454     sendSignalWithDelay(cownref, GSN_CONTINUEB, signal,
455                         DELAY_CHECK_SYSTEM_SCANS, 1);
456     break;
457   }
458   case ZLOG_LQHKEYREQ:
459     if (cnoOfLogPages == 0) {
460       jam();
461   busywait:
462       sendSignalWithDelay(cownref, GSN_CONTINUEB, signal, 10, 2);
463       return;
464     }//if
465     logPartPtr.i = data0;
466     ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
467     save = logPartPtr;
468 
469     logPartPtr.p->LogLqhKeyReqSent = ZFALSE;
470 
471     if (logPartPtr.p->waitWriteGciLog == LogPartRecord::WWGL_TRUE)
472     {
473       jam();
474       goto startnext;
475     }
476     if (logPartPtr.p->m_log_complete_queue.isEmpty())
477     {
478       jam();
479       if (logPartPtr.p->m_log_prepare_queue.isEmpty())
480       {
481         /**
482          * We have already removed all entries from both queues (this can
483          * happen if aborts arrive and remove entries from the prepare
484          * queue). We stop checking the log queues until they fill up
485          * again.
486          */
487         jam();
488         return;
489       }
490       /**
491        * prepare is first in queue...check that it's ok to rock'n'roll
492        */
493       if (logPartPtr.p->m_log_problems != 0 ||
494           ERROR_INSERTED(5083))
495       {
496         /**
497          * It will be restarted when problems are cleared...
498          */
499         jam();
500         return;
501       }
502 
503       if (cnoOfLogPages < ZMIN_LOG_PAGES_OPERATION)
504       {
505         jam();
506         logPartPtr.p->LogLqhKeyReqSent = ZTRUE;
507         goto busywait;
508       }
509     }
510 
511     logFilePtr.i = logPartPtr.p->currentLogfile;
512     ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
513     logPagePtr.i = logFilePtr.p->currentLogpage;
514     ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
515 
516     getFirstInLogQueue(signal, tcConnectptr);
517     fragptr.i = tcConnectptr.p->fragmentptr;
518     c_fragment_pool.getPtr(fragptr);
519 
520     // so that operation can continue...
521     ndbrequire(logPartPtr.p->logPartState == LogPartRecord::ACTIVE);
522     logPartPtr.p->logPartState = LogPartRecord::IDLE;
523     switch (tcConnectptr.p->transactionState) {
524     case TcConnectionrec::LOG_QUEUED:
525       if (tcConnectptr.p->abortState != TcConnectionrec::ABORT_IDLE)
526       {
527         jam();
528         abortCommonLab(signal, tcConnectptr);
529       }
530       else
531       {
532         jam();
533         logLqhkeyreqLab(signal, tcConnectptr);
534       }
535       break;
536     case TcConnectionrec::LOG_ABORT_QUEUED:
537       jam();
538       writeAbortLog(signal, tcConnectptr.p, logPartPtr.p);
539       removeLogTcrec(signal, tcConnectptr);
540       continueAfterLogAbortWriteLab(signal, tcConnectptr);
541       break;
542     case TcConnectionrec::LOG_COMMIT_QUEUED:
543     case TcConnectionrec::LOG_COMMIT_QUEUED_WAIT_SIGNAL:
544       jam();
545       writeCommitLog(signal, logPartPtr, tcConnectptr.p);
546       if (tcConnectptr.p->transactionState == TcConnectionrec::LOG_COMMIT_QUEUED) {
547         if (tcConnectptr.p->seqNoReplica == 0 ||
548 	    tcConnectptr.p->activeCreat == Fragrecord::AC_NR_COPY)
549         {
550           jam();
551           localCommitLab(signal, tcConnectptr);
552         }
553         else
554         {
555           jam();
556           commitReplyLab(signal, tcConnectptr.p);
557         }
558       }
559       else
560       {
561         jam();
562         tcConnectptr.p->transactionState = TcConnectionrec::LOG_COMMIT_WRITTEN_WAIT_SIGNAL;
563       }
564       break;
565     case TcConnectionrec::COMMIT_QUEUED:
566       jam();
567       localCommitLab(signal, tcConnectptr);
568       break;
569     case TcConnectionrec::ABORT_QUEUED:
570       jam();
571       abortCommonLab(signal, tcConnectptr);
572       break;
573     default:
574       ndbabort();
575     }//switch
576     /**
577      * LogFile/LogPage could have altered due to above
578      */
579   startnext:
580     logPartPtr = save;
581     logFilePtr.i = logPartPtr.p->currentLogfile;
582     ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
583     logPagePtr.i = logFilePtr.p->currentLogpage;
584     ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
585     logNextStart(signal);
586     return;
587     break;
588   case ZSR_GCI_LIMITS:
589     jam();
590     signal->theData[0] = data0;
591     srGciLimits(signal);
592     return;
593     break;
594   case ZSR_LOG_LIMITS:
595     jam();
596     signal->theData[0] = data0;
597     signal->theData[1] = data1;
598     signal->theData[2] = data2;
599     srLogLimits(signal);
600     return;
601     break;
602   case ZSEND_EXEC_CONF:
603     jam();
604     signal->theData[0] = data0;
605     sendExecConf(signal);
606     return;
607     break;
608   case ZEXEC_SR:
609     jam();
610     signal->theData[0] = data0;
611     execSr(signal);
612     return;
613     break;
614   case ZSR_FOURTH_COMP:
615     jam();
616     signal->theData[0] = data0;
617     srFourthComp(signal);
618     return;
619     break;
620   case ZINIT_FOURTH:
621     jam();
622     signal->theData[0] = data0;
623     initFourth(signal);
624     return;
625     break;
626   case ZTIME_SUPERVISION:
627     jam();
628     signal->theData[0] = data0;
629     timeSup(signal);
630     return;
631     break;
632   case ZSR_PHASE3_START:
633     jam();
634     srPhase3Start(signal);
635     return;
636     break;
637   case ZLQH_TRANS_NEXT:
638   {
639     jam();
640     TcNodeFailRecordPtr tcNodeFailPtr;
641     tcNodeFailPtr.i = data0;
642     ptrCheckGuard(tcNodeFailPtr, ctcNodeFailrecFileSize, tcNodeFailRecord);
643     lqhTransNextLab(signal, tcNodeFailPtr);
644     return;
645     break;
646   }
647   case ZSCAN_TC_CONNECT:
648     jam();
649     tabptr.i = data1;
650     ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
651     scanTcConnectLab(signal, data0, data2);
652     return;
653     break;
654   case ZINITIALISE_RECORDS:
655     jam();
656     initialiseRecordsLab(signal, data0, data2, signal->theData[4]);
657     return;
658     break;
659   case ZINIT_GCP_REC:
660     jam();
661     gcpPtr.i = 0;
662     ptrAss(gcpPtr, gcpRecord);
663     initGcpRecLab(signal);
664     startTimeSupervision(signal);
665     return;
666     break;
667   case ZCHECK_LCP_STOP_BLOCKED:
668   {
669     jam();
670     checkLcpStopBlockedLab(signal, data0);
671     return;
672   }
673   case ZSCAN_MARKERS:
674     jam();
675     scanMarkers(signal, data0, data1);
676     return;
677     break;
678 
679   case ZOPERATION_EVENT_REP:
680     jam();
681     /* Send counter event report */
682     {
683       const Uint32 len = c_Counters.build_event_rep(signal);
684       sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, len, JBB);
685     }
686 
687     {
688       const Uint32 report_interval = 5000;
689       const Uint32 len = c_Counters.build_continueB(signal);
690       signal->theData[0] = ZOPERATION_EVENT_REP;
691       sendSignalWithDelay(cownref, GSN_CONTINUEB, signal,
692                           report_interval, len);
693     }
694     break;
695   case ZDROP_TABLE_WAIT_USAGE:
696     jam();
697     dropTab_wait_usage(signal);
698     return;
699     break;
700   case ZENABLE_EXPAND_CHECK:
701   {
702     jam();
703     fragptr.i = signal->theData[1];
704     if (fragptr.i != RNIL)
705     {
706       jam();
707       c_lcp_complete_fragments.getPtr(fragptr);
708       Ptr<Fragrecord> save = fragptr;
709 
710       c_lcp_complete_fragments.next(fragptr);
711       signal->theData[0] = ZENABLE_EXPAND_CHECK;
712       signal->theData[1] = fragptr.i;
713       sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
714 
715       c_lcp_complete_fragments.remove(save);
716       return;
717     }
718     else
719     {
720       jam();
721       cstartRecReq = SRR_REDO_COMPLETE;
722       ndbrequire(c_lcp_complete_fragments.isEmpty());
723 
724       rebuildOrderedIndexes(signal, 0);
725       return;
726     }
727   }
728   case ZRETRY_TCKEYREF:
729   {
730     jam();
731     Uint32 cnt = signal->theData[1];
732     Uint32 ref = signal->theData[2];
733     if (cnt < (10 * 60 * 5))
734     {
735       jam();
736       /**
737        * Only retry for 5 minutes...then hope that API has handled it..somehow
738        */
739       memmove(signal->theData, signal->theData+3, 4*TcKeyRef::SignalLength);
740       sendTCKEYREF(signal, ref, 0, cnt);
741     }
742     return;
743   }
744   case ZWAIT_REORG_SUMA_FILTER_ENABLED:
745     jam();
746     wait_reorg_suma_filter_enabled(signal);
747     return;
748   case ZREBUILD_ORDERED_INDEXES:
749   {
750     Uint32 tableId = signal->theData[1];
751     rebuildOrderedIndexes(signal, tableId);
752     return;
753   }
754   case ZWAIT_READONLY:
755   {
756     jam();
757     wait_readonly(signal);
758     return;
759   }
760   case ZLCP_FRAG_WATCHDOG:
761   {
762     jam();
763     checkLcpFragWatchdog(signal);
764     return;
765   }
766   case ZSTART_LOCAL_LCP:
767   {
768     jam();
769     start_lcp_on_table(signal);
770     return;
771   }
772   default:
773 
774 #if defined ERROR_INSERT
775     // ERROR_INSERT 5090
776     Uint32 compact = signal->theData[0];
777     if (compact >> 16 == ZDELAY_FS_OPEN)
778     {
779       jam();
780       // Remove ZDELAY_FS_OPEN from compacted theData[0] and
781       // restore logFilePtr.i as it was in the original FSOPENCONF signal
782       signal->theData[0] = (Uint16)compact;
783       sendSignalWithDelay(cownref, GSN_FSOPENCONF, signal, 10, 2);
784       return;
785     }
786 #endif
787 
788     ndbabort();
789   }//switch
790 }//Dblqh::execCONTINUEB()
791 
792 /* *********************************************************> */
793 /*  Request from DBDIH to include a new node in the node list */
794 /*  and so forth.                                             */
795 /* *********************************************************> */
execINCL_NODEREQ(Signal * signal)796 void Dblqh::execINCL_NODEREQ(Signal* signal)
797 {
798   jamEntry();
799   BlockReference retRef = signal->theData[0];
800   Uint32 nodeId = signal->theData[1];
801   cnewestGci = signal->theData[2];
802   cnewestCompletedGci = signal->theData[2] - 1;
803   ndbrequire(cnoOfNodes < MAX_NDB_NODES);
804   for (Uint32 i = 0; i < cnoOfNodes; i++) {
805     jam();
806     if (cnodeData[i] == nodeId) {
807       jam();
808       cnodeStatus[i] = ZNODE_UP;
809     }//if
810   }//for
811 
812   {
813     HostRecordPtr Thostptr;
814     Thostptr.i = nodeId;
815     ptrCheckGuard(Thostptr, chostFileSize, hostRecord);
816     Thostptr.p->nodestatus = ZNODE_UP;
817   }
818 
819   signal->theData[0] = nodeId;
820   signal->theData[1] = cownref;
821   sendSignal(retRef, GSN_INCL_NODECONF, signal, 2, JBB);
822   return;
823 }//Dblqh::execINCL_NODEREQ()
824 
execTUPSEIZEREF(Signal * signal)825 void Dblqh::execTUPSEIZEREF(Signal* signal)
826 {
827   jamEntry();
828   ndbabort();
829 }//Dblqh::execTUPSEIZEREF()
830 
831 bool
is_first_instance()832 Dblqh::is_first_instance()
833 {
834   if (!isNdbMtLqh() || instance() == 1)
835     return true;
836   return false;
837 }
838 
839 /* ########################################################################## */
840 /* #######                  START / RESTART MODULE                    ####### */
841 /* ########################################################################## */
842 /* ************************************************************************>> */
843 /*  This is first signal that arrives in a start / restart. Sender is NDBCNTR_REF. */
844 /* ************************************************************************>> */
execSTTOR(Signal * signal)845 void Dblqh::execSTTOR(Signal* signal)
846 {
847   UintR tstartPhase;
848 
849   jamEntry();
850                                                   /* START CASE */
851   tstartPhase = signal->theData[1];
852                                                   /* SYSTEM RESTART RANK */
853   csignalKey = signal->theData[6];
854 #if defined NDBD_TRACENR
855   FILE *out = 0;
856   char *name;
857 #endif
858   switch (tstartPhase) {
859   case ZSTART_PHASE1:
860     jam();
861     cstartPhase = tstartPhase;
862     c_tup = (Dbtup*)globalData.getBlock(DBTUP, instance());
863     c_tux = (Dbtux*)globalData.getBlock(DBTUX, instance());
864     c_acc = (Dbacc*)globalData.getBlock(DBACC, instance());
865     c_pgman = (Pgman*)globalData.getBlock(PGMAN, instance());
866     c_backup = (Backup*)globalData.getBlock(BACKUP, instance());
867     c_restore = (Restore*)globalData.getBlock(RESTORE, instance());
868     c_lgman = (Lgman*)globalData.getBlock(LGMAN);
869     ndbrequire(c_tup != 0 &&
870                c_tux != 0 &&
871                c_acc != 0 &&
872                c_pgman != 0 &&
873                c_lgman != 0 &&
874                c_restore != 0);
875 
876 #ifdef NDBD_TRACENR
877 #ifdef VM_TRACE
878     out = globalSignalLoggers.getOutputStream();
879 #endif
880     if (out == 0) {
881       name = NdbConfig_SignalLogFileName(getOwnNodeId());
882       out = fopen(name, "a");
883     }
884     tracenrout = new NdbOut(* new FileOutputStream(out));
885 #endif
886 
887 #ifdef NDBD_TRACENR
888     traceopout = &ndbout;
889 #endif
890 
891 #ifdef NDB_DEBUG_REDO
892     {
893       char buf[100];
894       if (NdbEnv_GetEnv("NDB_DEBUG_REDO", buf, sizeof(buf)))
895       {
896         DEBUG_REDO = 1;
897       }
898     }
899 #endif
900     sendsttorryLab(signal);
901     return;
902   case 3:
903     jam();
904 #if (defined(VM_TRACE) || \
905      defined(ERROR_INSERT)) && \
906     defined(DO_TRANSIENT_POOL_STAT)
907 
908     /* Start reporting statistics for transient pools */
909     signal->theData[0] = ZLQH_TRANSIENT_POOL_STAT;
910     sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
911 #endif
912     send_read_local_sysfile(signal);
913     return;
914   case 4:
915     jam();
916     define_backup(signal);
917     break;
918   case 6:
919     c_elapsed_time_millis = 0;
920     init_elapsed_time(signal, c_latestTIME_SIGNAL);
921     sendsttorryLab(signal);
922     break;
923   case 9:
924     jam();
925     /**
926      * We add this wait phase to avoid having to handle multiple
927      * writers of the Local sysfile. We check here if we have an
928      * outstanding WRITE_LOCAL_SYSFILE_REQ signal, if that is the
929      * case we set a flag that we are waiting for this and send
930      * STTORRY when this is returned.
931      *
932      * WRITE_LOCAL_SYSFILE_REQ is only sent from first instance, so
933      * need to handle this for other instances.
934      */
935     ndbrequire(cstartRecReq == SRR_FIRST_LCP_DONE);
936     if (is_first_instance())
937     {
938       c_start_phase_9_waiting = true;
939       /**
940        * Restart is completed, we need to wait until this has been
941        * reflected in the local sysfile. It becomes reflected in
942        * local sysfile in the next processing of GCP_SAVEREQ. This
943        * avoids complex interaction handling of writes to the
944        * local sysfile.
945        */
946       DEB_LCP(("(%u)Start phase 9 wait started", instance()));
947     }
948     else
949     {
950       jam();
951       /**
952        * Restart is done, record this fact and move on in restart
953        * processing.
954        */
955       write_local_sysfile_restart_complete_done(signal);
956     }
957     return;
958   default:
959     jam();
960     /*empty*/;
961     sendsttorryLab(signal);
962     return;
963     break;
964   }//switch
965 }//Dblqh::execSTTOR()
966 
967 void
write_local_sysfile_restart_complete_done(Signal * signal)968 Dblqh::write_local_sysfile_restart_complete_done(Signal *signal)
969 {
970   cstartPhase = ZNIL;
971   cstartType = ZNIL;
972   c_start_phase_9_waiting = false;
973   sendsttorryLab(signal);
974 }
975 
976 void
send_read_local_sysfile(Signal * signal)977 Dblqh::send_read_local_sysfile(Signal *signal)
978 {
979   ReadLocalSysfileReq *req = (ReadLocalSysfileReq*)signal->getDataPtrSend();
980   req->userPointer = 0;
981   req->userReference = reference();
982   sendSignal(NDBCNTR_REF, GSN_READ_LOCAL_SYSFILE_REQ, signal,
983              ReadLocalSysfileReq::SignalLength, JBB);
984 }
985 
986 void
execREAD_LOCAL_SYSFILE_CONF(Signal * signal)987 Dblqh::execREAD_LOCAL_SYSFILE_CONF(Signal *signal)
988 {
989   ReadLocalSysfileConf *conf = (ReadLocalSysfileConf*)signal->getDataPtr();
990   c_local_sysfile.m_node_restorable_on_its_own =
991     conf->nodeRestorableOnItsOwn;
992   c_local_sysfile.m_max_gci_restorable = conf->maxGCIRestorable;
993   c_backup->setRestorableGci(c_local_sysfile.m_max_gci_restorable);
994   sendsttorryLab(signal);
995 }
996 
997 
998 void
define_backup(Signal * signal)999 Dblqh::define_backup(Signal* signal)
1000 {
1001   DefineBackupReq * req = (DefineBackupReq*)signal->getDataPtrSend();
1002   req->backupId = 0;
1003   req->clientRef = 0;
1004   req->clientData = 0;
1005   req->senderRef = reference();
1006   req->masterRef = reference();
1007   req->backupPtr = 0;
1008   req->backupKey[0] = 0;
1009   req->backupKey[1] = 0;
1010   req->backupDataLen = ~0;
1011 
1012   NdbNodeBitmask nodes;
1013   nodes.set(getOwnNodeId());
1014 
1015   BlockReference backupRef = calcInstanceBlockRef(BACKUP);
1016   Uint32 packed_length = nodes.getPackedLengthInWords();
1017 
1018   // Backup is not allowed for mixed versions of data nodes
1019   ndbrequire(ndbd_send_node_bitmask_in_section(getNodeInfo(refToNode(backupRef)).m_version));
1020 
1021   LinearSectionPtr lsptr[3];
1022   lsptr[0].p = nodes.rep.data;
1023   lsptr[0].sz = packed_length;
1024   sendSignal(backupRef, GSN_DEFINE_BACKUP_REQ, signal,
1025        DefineBackupReq::SignalLength_v1, JBB, lsptr, 1);
1026 }
1027 
1028 void
execDEFINE_BACKUP_REF(Signal * signal)1029 Dblqh::execDEFINE_BACKUP_REF(Signal* signal)
1030 {
1031   jamEntry();
1032   m_backup_ptr = RNIL;
1033   DefineBackupRef* ref = (DefineBackupRef*)signal->getDataPtrSend();
1034   int err_code = 0;
1035   char * extra_msg = NULL;
1036 
1037   switch(ref->errorCode){
1038     case DefineBackupRef::Undefined:
1039     case DefineBackupRef::FailedToSetupFsBuffers:
1040     case DefineBackupRef::FailedToAllocateBuffers:
1041     case DefineBackupRef::FailedToAllocateTables:
1042     case DefineBackupRef::FailedAllocateTableMem:
1043     case DefineBackupRef::FailedToAllocateFileRecord:
1044     case DefineBackupRef::FailedToAllocateAttributeRecord:
1045     case DefineBackupRef::FailedInsertFileHeader:
1046     case DefineBackupRef::FailedInsertTableList:
1047       jam();
1048       err_code = NDBD_EXIT_INVALID_CONFIG;
1049       extra_msg = (char*) "Probably Backup parameters configuration error, Please consult the manual";
1050       progError(__LINE__, err_code, extra_msg);
1051   }
1052 
1053   sendsttorryLab(signal);
1054 }
1055 
1056 void
execDEFINE_BACKUP_CONF(Signal * signal)1057 Dblqh::execDEFINE_BACKUP_CONF(Signal* signal)
1058 {
1059   jamEntry();
1060   DefineBackupConf * conf = (DefineBackupConf*)signal->getDataPtrSend();
1061   m_backup_ptr = conf->backupPtr;
1062   sendsttorryLab(signal);
1063 }
1064 
1065 /* ***************************************> */
1066 /*  Restart phases 1 - 6, sender is Ndbcntr */
1067 /* ***************************************> */
execNDB_STTOR(Signal * signal)1068 void Dblqh::execNDB_STTOR(Signal* signal)
1069 {
1070   jamEntry();
1071   Uint32 ownNodeId = signal->theData[1];   /* START PHASE*/
1072   cstartPhase = signal->theData[2];  /* MY NODE ID */
1073   cstartType = signal->theData[3];   /* START TYPE */
1074 
1075   switch (cstartPhase) {
1076   case ZSTART_PHASE1:
1077     jam();
1078     /* Which bits in request info should 'pass through' replicas */
1079     preComputedRequestInfoMask = 0;
1080     // Dont setDisableFkconstraints - handled on primary
1081     LqhKeyReq::setNoTriggersFlag(preComputedRequestInfoMask, 1);
1082     LqhKeyReq::setUtilFlag(preComputedRequestInfoMask, 1);
1083     // Dont setNoWaitFlag - handled on primary
1084     LqhKeyReq::setLastReplicaNo(preComputedRequestInfoMask, LqhKeyReq::RI_LAST_REPL_MASK);
1085     // Dont LqhKeyReq::setApplicationAddressFlag
1086     LqhKeyReq::setDirtyFlag(preComputedRequestInfoMask, 1);
1087     // Dont LqhKeyReq::setInterpretedFlag
1088     LqhKeyReq::setSimpleFlag(preComputedRequestInfoMask, 1);
1089     LqhKeyReq::setOperation(preComputedRequestInfoMask, LqhKeyReq::RI_OPERATION_MASK);
1090     LqhKeyReq::setGCIFlag(preComputedRequestInfoMask, 1);
1091     LqhKeyReq::setNrCopyFlag(preComputedRequestInfoMask, 1);
1092     // Dont setAIInLqhKeyReq
1093     // Dont setSeqNoReplica
1094     // Dont setSameClientAndTcFlag
1095     // Dont setReturnedReadLenAIFlag
1096     LqhKeyReq::setMarkerFlag(preComputedRequestInfoMask, 1);
1097     LqhKeyReq::setQueueOnRedoProblemFlag(preComputedRequestInfoMask, 1);
1098     //preComputedRequestInfoMask = 0x003d7fff;
1099     startphase1Lab(signal, /* dummy */ ~0, ownNodeId);
1100 
1101     {
1102       /* Start counter activity event reporting. */
1103       const Uint32 len = c_Counters.build_continueB(signal);
1104       signal->theData[0] = ZOPERATION_EVENT_REP;
1105       sendSignalWithDelay(cownref, GSN_CONTINUEB, signal, 10, len);
1106     }
1107     return;
1108     break;
1109   case ZSTART_PHASE2:
1110     jam();
1111     startphase2Lab(signal, /* dummy */ ~0);
1112     return;
1113     break;
1114   case ZSTART_PHASE3:
1115     jam();
1116     startphase3Lab(signal);
1117     return;
1118     break;
1119   case ZSTART_PHASE4:
1120     jam();
1121     /*empty*/;
1122     sendNdbSttorryLab(signal);
1123     return;
1124     break;
1125   case ZSTART_PHASE6:
1126     jam();
1127     startphase6Lab(signal);
1128     return;
1129     break;
1130   default:
1131     jam();
1132     /*empty*/;
1133     sendNdbSttorryLab(signal);
1134     return;
1135     break;
1136   }//switch
1137 }//Dblqh::execNDB_STTOR()
1138 
1139 /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
1140 /* +++++++                         START PHASE 2                    +++++++ */
1141 /*                                                                          */
1142 /*             INITIATE ALL RECORDS WITHIN THE BLOCK                        */
1143 /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
startphase1Lab(Signal * signal,Uint32 _dummy,Uint32 ownNodeId)1144 void Dblqh::startphase1Lab(Signal* signal, Uint32 _dummy, Uint32 ownNodeId)
1145 {
1146   UintR Ti, Tj;
1147   HostRecordPtr ThostPtr;
1148 
1149 /* ------- INITIATE ALL RECORDS ------- */
1150   cownNodeid    = ownNodeId;
1151   caccBlockref  = calcInstanceBlockRef(DBACC);
1152   ctupBlockref  = calcInstanceBlockRef(DBTUP);
1153   ctuxBlockref  = calcInstanceBlockRef(DBTUX);
1154   cownref       = calcInstanceBlockRef(DBLQH);
1155 
1156   for (Uint32 i = 0; i <= ZCOPY_FRAGREQ_CHECK_INDEX; i++)
1157   {
1158     c_check_scanptr_i[i] = RNIL;
1159     c_check_scanptr_save_line[i] = __LINE__;
1160     c_check_scanptr_save_timer[i] = 0;
1161   }
1162   ndbassert(cownref == reference());
1163   for (Ti = 0; Ti < chostFileSize; Ti++) {
1164     ThostPtr.i = Ti;
1165     ptrCheckGuard(ThostPtr, chostFileSize, hostRecord);
1166     /*
1167      * Valid only if receiver has same number of LQH workers.
1168      * In general full instance key of fragment must be used.
1169      */
1170     ThostPtr.p->inPackedList = false;
1171     for (Tj = 0; Tj < NDB_ARRAY_SIZE(ThostPtr.p->lqh_pack); Tj++)
1172     {
1173       ThostPtr.p->lqh_pack[Tj].noOfPackedWords = 0;
1174       ThostPtr.p->lqh_pack[Tj].hostBlockRef =
1175         numberToRef(DBLQH, Tj, ThostPtr.i);
1176     }
1177     for (Tj = 0; Tj < NDB_ARRAY_SIZE(ThostPtr.p->tc_pack); Tj++)
1178     {
1179       ThostPtr.p->tc_pack[Tj].noOfPackedWords = 0;
1180       ThostPtr.p->tc_pack[Tj].hostBlockRef =
1181         numberToRef(DBTC, Tj, ThostPtr.i);
1182     }
1183     ThostPtr.p->nodestatus = ZNODE_DOWN;
1184   }//for
1185   cpackedListIndex = 0;
1186 
1187   bool do_init =
1188     (cstartType == NodeState::ST_INITIAL_START) ||
1189     (cstartType == NodeState::ST_INITIAL_NODE_RESTART);
1190 
1191   LogFileRecordPtr prevLogFilePtr;
1192   LogFileRecordPtr zeroLogFilePtr;
1193 
1194   if (do_init)
1195   {
1196     g_eventLogger->info("LDM(%u): Starting REDO log initialisation",
1197                         instance());
1198   }
1199   ndbrequire(cnoLogFiles != 0);
1200   for (logPartPtr.i = 0; logPartPtr.i < clogPartFileSize; logPartPtr.i++)
1201   {
1202     jam();
1203     ptrAss(logPartPtr, logPartRecord);
1204     initLogpart(signal);
1205     for (Uint32 fileNo = 0; fileNo < cnoLogFiles; fileNo++)
1206     {
1207       seizeLogfile(signal);
1208       if (fileNo != 0)
1209       {
1210         jam();
1211         prevLogFilePtr.p->nextLogFile = logFilePtr.i;
1212         logFilePtr.p->prevLogFile = prevLogFilePtr.i;
1213       }
1214       else
1215       {
1216         jam();
1217         logPartPtr.p->firstLogfile = logFilePtr.i;
1218         logPartPtr.p->currentLogfile = logFilePtr.i;
1219         zeroLogFilePtr.i = logFilePtr.i;
1220         zeroLogFilePtr.p = logFilePtr.p;
1221       }//if
1222       prevLogFilePtr.i = logFilePtr.i;
1223       prevLogFilePtr.p = logFilePtr.p;
1224       initLogfile(signal, fileNo);
1225       if (do_init)
1226       {
1227         jam();
1228         if (logFilePtr.i == zeroLogFilePtr.i)
1229         {
1230           jam();
1231 /* ------------------------------------------------------------------------- */
1232 /*IN AN INITIAL START WE START BY CREATING ALL LOG FILES AND SETTING THEIR   */
1233 /*PROPER SIZE AND INITIALISING PAGE ZERO IN ALL FILES.                       */
1234 /*WE START BY CREATING FILE ZERO IN EACH LOG PART AND THEN PROCEED           */
1235 /*SEQUENTIALLY THROUGH ALL LOG FILES IN THE LOG PART.                        */
1236 /* ------------------------------------------------------------------------- */
1237           if (m_use_om_init == 0 || logPartPtr.i == 0)
1238           {
1239             /**
1240              * initialize one file at a time if using OM_INIT
1241              */
1242             jam();
1243 #if defined(USE_INIT_GLOBAL_VARIABLES)
1244             if (m_use_om_init)
1245             {
1246               jam();
1247               /**
1248                * FSWRITEREQ does cross-thread execute-direct
1249                *   which makes the clear_global_variables "unsafe"
1250                *   disable it until we're finished with init log-files
1251                */
1252               disable_global_variables();
1253             }
1254 #endif
1255             openLogfileInit(signal);
1256           }
1257         }//if
1258       }//if
1259     }//for
1260     zeroLogFilePtr.p->prevLogFile = logFilePtr.i;
1261     logFilePtr.p->nextLogFile = zeroLogFilePtr.i;
1262   }
1263 
1264   initReportStatus(signal);
1265   if (!do_init)
1266   {
1267     jam();
1268     g_eventLogger->info("LDM(%u): Started LDM restart phase 1"
1269                         " (read REDO log page headers to init"
1270                         " REDO log data)",
1271                         instance());
1272     sendNdbSttorryLab(signal);
1273   }
1274   else
1275   {
1276     reportStatus(signal);
1277   }
1278 
1279   return;
1280 }//Dblqh::startphase1Lab()
1281 
1282 /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
1283 /* +++++++                           START PHASE 2                    +++++++ */
1284 /*                                                                            */
1285 /* CONNECT LQH WITH ACC AND TUP.                                              */
1286 /* EVERY CONNECTION RECORD IN LQH IS ASSIGNED TO ONE ACC CONNECTION RECORD    */
1287 /*       AND ONE TUP CONNECTION RECORD.                                       */
1288 /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
startphase2Lab(Signal * signal,Uint32 _dummy)1289 void Dblqh::startphase2Lab(Signal* signal, Uint32 _dummy)
1290 {
1291   cmaxWordsAtNodeRec = MAX_NO_WORDS_OUTSTANDING_COPY_FRAGMENT;
1292 /* -- ACC AND TUP CONNECTION PROCESS -- */
1293   TcConnectionrecPtr tcConnectptr;
1294   ndbrequire(tcConnect_pool.seize(tcConnectptr));
1295   ctcConnectReservedCount = 0;
1296   cfirstfreeTcConrec = RNIL;
1297   moreconnectionsLab(signal, tcConnectptr);
1298   signal->theData[0] = ZCHECK_SYSTEM_SCANS;
1299   sendSignalWithDelay(cownref, GSN_CONTINUEB, signal,
1300                       DELAY_CHECK_SYSTEM_SCANS, 1);
1301   return;
1302 }//Dblqh::startphase2Lab()
1303 
moreconnectionsLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)1304 void Dblqh::moreconnectionsLab(Signal* signal,
1305                                const TcConnectionrecPtr tcConnectptr)
1306 {
1307   // set TUX block here (no operation is seized in TUX)
1308 /* NO STATE CHECKING IS PERFORMED, ASSUMED TO WORK */
1309 /* *************** */
1310 /*  ACCSEIZEREQ  < */
1311 /* *************** */
1312   signal->theData[0] = tcConnectptr.i;
1313   signal->theData[1] = cownref;
1314   sendSignal(caccBlockref, GSN_ACCSEIZEREQ, signal, 2, JBB);
1315   return;
1316 }//Dblqh::moreconnectionsLab()
1317 
1318 /* ***************> */
1319 /*  ACCSEIZECONF  > */
1320 /* ***************> */
execACCSEIZECONF(Signal * signal)1321 void Dblqh::execACCSEIZECONF(Signal* signal)
1322 {
1323   jamEntry();
1324   TcConnectionrecPtr tcConnectptr;
1325   tcConnectptr.i = signal->theData[0];
1326   ndbrequire(tcConnect_pool.getValidPtr(tcConnectptr));
1327   tcConnectptr.p->accConnectrec = signal->theData[1];
1328   tcConnectptr.p->accConnectPtrP =
1329     c_acc->get_operation_ptr(signal->theData[1]);
1330 
1331 /* *************** */
1332 /*  TUPSEIZEREQ  < */
1333 /* *************** */
1334   signal->theData[0] = tcConnectptr.i;
1335   signal->theData[1] = cownref;
1336   sendSignal(ctupBlockref, GSN_TUPSEIZEREQ, signal, 2, JBB);
1337   return;
1338 }//Dblqh::execACCSEIZECONF()
1339 
1340 /* ***************> */
1341 /*  TUPSEIZECONF  > */
1342 /* ***************> */
execTUPSEIZECONF(Signal * signal)1343 void Dblqh::execTUPSEIZECONF(Signal* signal)
1344 {
1345   jamEntry();
1346   TcConnectionrecPtr tcConnectptr;
1347   tcConnectptr.i = signal->theData[0];
1348   ndbrequire(tcConnect_pool.getValidPtr(tcConnectptr));
1349   tcConnectptr.p->tupConnectrec = signal->theData[1];
1350   tcConnectptr.p->tupConnectPtrP =
1351     c_tup->get_operation_ptr(signal->theData[1]);
1352 /* ------- CHECK IF THERE ARE MORE CONNECTIONS TO BE CONNECTED ------- */
1353   Uint32 prevFirst = cfirstfreeTcConrec;
1354   tcConnectptr.p->nextTcConnectrec = prevFirst;
1355   cfirstfreeTcConrec = tcConnectptr.i;
1356   ctcConnectReservedCount++;
1357   if (ctcConnectReservedCount < ctcConnectReserved)
1358   {
1359     jam();
1360     ndbrequire(tcConnect_pool.seize(tcConnectptr));
1361     moreconnectionsLab(signal, tcConnectptr);
1362     return;
1363   }//if
1364 /* ALL LQH_CONNECT RECORDS ARE CONNECTED TO ACC AND TUP ---- */
1365   sendNdbSttorryLab(signal);
1366   return;
1367 }//Dblqh::execTUPSEIZECONF()
1368 
1369 /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
1370 /* +++++++                    START PHASE 4                          +++++++ */
1371 /*                                                                           */
1372 /*       CONNECT LQH WITH LQH.                                               */
1373 /*       CONNECT EACH LQH WITH EVERY LQH IN THE DATABASE SYSTEM.             */
1374 /*       IF INITIAL START THEN CREATE THE FRAGMENT LOG FILES                 */
1375 /*IF SYSTEM RESTART OR NODE RESTART THEN OPEN THE FRAGMENT LOG FILES AND     */
1376 /*FIND THE END OF THE LOG FILES.                                             */
1377 /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
1378 /*        WAIT UNTIL ADD NODE PROCESSES ARE COMPLETED                        */
1379 /*        IF INITIAL START ALSO WAIT FOR LOG FILES TO INITIALISED            */
1380 /*START TIME SUPERVISION OF LOG FILES. WE HAVE TO WRITE LOG PAGES TO DISK    */
1381 /*EVEN IF THE PAGES ARE NOT FULL TO ENSURE THAT THEY COME TO DISK ASAP.      */
1382 /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
startphase3Lab(Signal * signal)1383 void Dblqh::startphase3Lab(Signal* signal)
1384 {
1385   caddNodeState = ZTRUE;
1386 /* ***************<< */
1387 /*  READ_NODESREQ  < */
1388 /* ***************<< */
1389   cinitialStartOngoing = ZTRUE;
1390 
1391   switch(cstartType){
1392   case NodeState::ST_NODE_RESTART:
1393   case NodeState::ST_SYSTEM_RESTART:
1394     jam();
1395     for (logPartPtr.i = 0; logPartPtr.i < clogPartFileSize; logPartPtr.i++)
1396     {
1397       jam();
1398       LogFileRecordPtr locLogFilePtr;
1399       ptrAss(logPartPtr, logPartRecord);
1400       locLogFilePtr.i = logPartPtr.p->firstLogfile;
1401       ptrCheckGuard(locLogFilePtr, clogFileFileSize, logFileRecord);
1402       locLogFilePtr.p->logFileStatus = LogFileRecord::OPEN_SR_FRONTPAGE;
1403       openFileRw(signal, locLogFilePtr, false); /* No write buffering */
1404     }//for
1405     break;
1406   case NodeState::ST_INITIAL_START:
1407   case NodeState::ST_INITIAL_NODE_RESTART:
1408     jam();
1409     for (logPartPtr.i = 0; logPartPtr.i < clogPartFileSize; logPartPtr.i++)
1410     {
1411       jam();
1412       signal->theData[0] = ZINIT_FOURTH;
1413       signal->theData[1] = logPartPtr.i;
1414       sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
1415     }
1416     break;
1417   }
1418 
1419   signal->theData[0] = cownref;
1420   sendSignal(NDBCNTR_REF, GSN_READ_NODESREQ, signal, 1, JBB);
1421   return;
1422 }//Dblqh::startphase3Lab()
1423 
1424 /* ****************** */
1425 /*  READ_NODESCONF  > */
1426 /* ****************** */
execREAD_NODESCONF(Signal * signal)1427 void Dblqh::execREAD_NODESCONF(Signal* signal)
1428 {
1429   jamEntry();
1430 
1431   ReadNodesConf * const readNodes = (ReadNodesConf *)&signal->theData[0];
1432   cnoOfNodes = readNodes->noOfNodes;
1433 
1434   {
1435     ndbrequire(signal->getNoOfSections() == 1);
1436     SegmentedSectionPtr ptr;
1437     SectionHandle handle(this, signal);
1438     handle.getSection(ptr, 0);
1439     ndbrequire(ptr.sz == 5 * NdbNodeBitmask::Size);
1440     copy((Uint32*)&readNodes->definedNodes.rep.data, ptr);
1441     releaseSections(handle);
1442   }
1443 
1444   unsigned ind = 0;
1445   unsigned i = 0;
1446   for (i = 1; i < MAX_NDB_NODES; i++) {
1447     jam();
1448     if (readNodes->definedNodes.get(i))
1449     {
1450       jam();
1451       cnodeData[ind]    = i;
1452       cnodeStatus[ind]  = readNodes->inactiveNodes.get(i);
1453 
1454       {
1455         HostRecordPtr Thostptr;
1456         Thostptr.i = i;
1457         ptrCheckGuard(Thostptr, chostFileSize, hostRecord);
1458         Thostptr.p->nodestatus = cnodeStatus[ind];
1459       }
1460 
1461       //readNodes->getVersionId(i, readNodes->theVersionIds) not used
1462       if (!readNodes->inactiveNodes.get(i))
1463       {
1464 	jam();
1465 	m_sr_nodes.set(i);
1466       }
1467       ind++;
1468     }//if
1469   }//for
1470   ndbrequire(ind == cnoOfNodes);
1471   ndbrequire(cnoOfNodes >= 1 && cnoOfNodes < MAX_NDB_NODES);
1472   ndbrequire(!(cnoOfNodes == 1 && cstartType == NodeState::ST_NODE_RESTART));
1473 
1474 #ifdef ERROR_INSERT
1475   c_master_node_id = readNodes->masterNodeId;
1476 #endif
1477 
1478   caddNodeState = ZFALSE;
1479   if (cstartType == NodeState::ST_SYSTEM_RESTART)
1480   {
1481     jam();
1482     sendNdbSttorryLab(signal);
1483     return;
1484   }
1485   else if (cstartType == NodeState::ST_NODE_RESTART)
1486   {
1487     jam();
1488     SET_TRACENR_FLAG;
1489     m_sr_nodes.clear();
1490     m_sr_nodes.set(getOwnNodeId());
1491     sendNdbSttorryLab(signal);
1492     return;
1493   }
1494   SET_TRACENR_FLAG;
1495 
1496   checkStartCompletedLab(signal);
1497   return;
1498 }//Dblqh::execREAD_NODESCONF()
1499 
checkStartCompletedLab(Signal * signal)1500 void Dblqh::checkStartCompletedLab(Signal* signal)
1501 {
1502   if (caddNodeState == ZFALSE) {
1503     if (cinitialStartOngoing == ZFALSE) {
1504       jam();
1505       sendNdbSttorryLab(signal);
1506       return;
1507     }//if
1508   }//if
1509   return;
1510 }//Dblqh::checkStartCompletedLab()
1511 
1512 /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
1513 /* SET CONCURRENCY OF LOCAL CHECKPOINTS TO BE USED AFTER SYSTEM RESTART.      */
1514 /* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
startphase6Lab(Signal * signal)1515 void Dblqh::startphase6Lab(Signal* signal)
1516 {
1517   CLEAR_TRACENR_FLAG;
1518   sendNdbSttorryLab(signal);
1519   return;
1520 }//Dblqh::startphase6Lab()
1521 
sendNdbSttorryLab(Signal * signal)1522 void Dblqh::sendNdbSttorryLab(Signal* signal)
1523 {
1524   signal->theData[0] = cownref;
1525   BlockReference cntrRef = !isNdbMtLqh() ? NDBCNTR_REF : DBLQH_REF;
1526   sendSignal(cntrRef, GSN_NDB_STTORRY, signal, 1, JBB);
1527   return;
1528 }//Dblqh::sendNdbSttorryLab()
1529 
sendsttorryLab(Signal * signal)1530 void Dblqh::sendsttorryLab(Signal* signal)
1531 {
1532 /* *********<< */
1533 /*  STTORRY  < */
1534 /* *********<< */
1535   signal->theData[0] = csignalKey; /* SIGNAL KEY */
1536   signal->theData[1] = 3;          /* BLOCK CATEGORY */
1537   signal->theData[2] = 2;          /* SIGNAL VERSION NUMBER */
1538   signal->theData[3] = ZSTART_PHASE1;
1539   signal->theData[4] = 3;
1540   signal->theData[5] = 4;
1541   signal->theData[6] = 6;
1542   signal->theData[7] = 9;
1543   signal->theData[8] = 255;
1544   BlockReference cntrRef = !isNdbMtLqh() ? NDBCNTR_REF : DBLQH_REF;
1545   sendSignal(cntrRef, GSN_STTORRY, signal, 9, JBB);
1546   return;
1547 }//Dblqh::sendsttorryLab()
1548 
1549 /* ***************>> */
1550 /*  READ_NODESREF  > */
1551 /* ***************>> */
execREAD_NODESREF(Signal * signal)1552 void Dblqh::execREAD_NODESREF(Signal* signal)
1553 {
1554   jamEntry();
1555   ndbabort();
1556 }//Dblqh::execREAD_NODESREF()
1557 
1558 /* ****************** */
1559 /*  READ_CONFIG_REQ > */
1560 /* ****************** */
execREAD_CONFIG_REQ(Signal * signal)1561 void Dblqh::execREAD_CONFIG_REQ(Signal* signal)
1562 {
1563   const ReadConfigReq * req = (ReadConfigReq*)signal->getDataPtr();
1564   Uint32 ref = req->senderRef;
1565   Uint32 senderData = req->senderData;
1566   ndbrequire(req->noOfParameters == 0);
1567 
1568   jamEntry();
1569 
1570   const ndb_mgm_configuration_iterator * p =
1571     m_ctx.m_config.getOwnConfigIterator();
1572   ndbrequire(p != 0);
1573 
1574 
1575   /**
1576    * TODO move check of log-parts vs. ndbMtLqhWorkers to better place
1577    * (Configuration.cpp ??)
1578    */
1579   ndbrequire(globalData.ndbLogParts <= NDB_MAX_LOG_PARTS);
1580   if (globalData.ndbMtLqhWorkers > globalData.ndbLogParts)
1581   {
1582     char buf[255];
1583     BaseString::snprintf(buf, sizeof(buf),
1584       "Trying to start %d LQH workers with only %d log parts, try initial"
1585       " node restart to be able to use more LQH workers.",
1586       globalData.ndbMtLqhWorkers, globalData.ndbLogParts);
1587     progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, buf);
1588   }
1589 
1590   if (globalData.ndbLogParts != 4 &&
1591       globalData.ndbLogParts != 6 &&
1592       globalData.ndbLogParts != 8 &&
1593       globalData.ndbLogParts != 10 &&
1594       globalData.ndbLogParts != 12 &&
1595       globalData.ndbLogParts != 16 &&
1596       globalData.ndbLogParts != 20 &&
1597       globalData.ndbLogParts != 24 &&
1598       globalData.ndbLogParts != 32)
1599   {
1600     char buf[255];
1601     BaseString::snprintf(buf, sizeof(buf),
1602       "Trying to start with %d log parts, number of log parts can"
1603       " only be set to 4, 6, 8, 10, 12, 16, 20, 24 or 32.",
1604       globalData.ndbLogParts);
1605     progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, buf);
1606   }
1607 
1608   Uint32 redoLogHandlers = isNdbMtLqh() ? globalData.ndbMtLqhWorkers : 1;
1609   if ((redoLogHandlers * 4) < globalData.ndbLogParts)
1610   {
1611     char buf[255];
1612     BaseString::snprintf(buf, sizeof(buf),
1613       "Trying to start %d LQH workers with %d log parts, "
1614       "too many log parts per LQH (max 4 parts per LQH)",
1615        redoLogHandlers, globalData.ndbLogParts);
1616     progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, buf);
1617   }
1618 
1619   cnoLogFiles = 8;
1620   ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_NO_REDOLOG_FILES,
1621 					&cnoLogFiles));
1622   ndbrequire(cnoLogFiles > 0);
1623 
1624   Uint32 log_page_size= 0;
1625   ndb_mgm_get_int_parameter(p, CFG_DB_REDO_BUFFER,
1626 			    &log_page_size);
1627 
1628   c_max_scan_direct_count = ZMAX_SCAN_DIRECT_COUNT;
1629   ndb_mgm_get_int_parameter(p, CFG_DB_SCHED_SCAN_PRIORITY,
1630 			    &c_max_scan_direct_count);
1631 
1632   /**
1633    * Always set page size in half MBytes
1634    */
1635   clogPageFileSize= (log_page_size / sizeof(LogPageRecord));
1636   Uint32 mega_byte_part= clogPageFileSize & 15;
1637   if (mega_byte_part != 0) {
1638     jam();
1639     clogPageFileSize+= (16 - mega_byte_part);
1640   }
1641 
1642   /* maximum number of log file operations */
1643   clfoFileSize = clogPageFileSize;
1644   if (clfoFileSize < ZLFO_MIN_FILE_SIZE)
1645     clfoFileSize = ZLFO_MIN_FILE_SIZE;
1646 
1647   ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_LQH_TABLE, &ctabrecFileSize));
1648   clogFileFileSize = clogPartFileSize * cnoLogFiles;
1649 
1650   ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_DISCLESS, &c_diskless));
1651   c_o_direct = true;
1652   ndb_mgm_get_int_parameter(p, CFG_DB_O_DIRECT, &c_o_direct);
1653 
1654   m_use_om_init = 0;
1655   {
1656     const char * conf = 0;
1657     if (!ndb_mgm_get_string_parameter(p, CFG_DB_INIT_REDO, &conf) && conf)
1658     {
1659       jam();
1660       if (native_strcasecmp(conf, "sparse") == 0)
1661       {
1662         jam();
1663         m_use_om_init = 0;
1664       }
1665       else if (native_strcasecmp(conf, "full") == 0)
1666       {
1667         jam();
1668         m_use_om_init = 1;
1669       }
1670     }
1671   }
1672 
1673   c_o_direct_sync_flag = false;
1674   ndb_mgm_get_int_parameter(p,
1675                             CFG_DB_O_DIRECT_SYNC_FLAG,
1676                             &c_o_direct_sync_flag);
1677 #ifdef WIN32
1678   /**
1679    * Windows currently has no support for O_DIRECT and in
1680    * O_DIRECT_SYNC mode we optimise away needed FSYNCs
1681    * So avoid doing that by accident on Win
1682    */
1683   if (c_o_direct_sync_flag)
1684   {
1685     g_eventLogger->warning("ODirectSyncFlag not supported on Windows, ignored");
1686     c_o_direct_sync_flag = false;
1687   }
1688 else
1689   if (c_o_direct_sync_flag && m_use_om_init == 0)
1690   {
1691     g_eventLogger->warning("ODirectSyncFlag not supported"
1692                            "without setting InitFragmentLogFiles=full");
1693     c_o_direct_sync_flag = false;
1694   }
1695 #endif
1696 
1697   Uint32 tmp= 0;
1698   ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_LQH_FRAG, &tmp));
1699   c_fragment_pool.setSize(tmp);
1700 
1701   if (!ndb_mgm_get_int_parameter(p, CFG_DB_REDOLOG_FILE_SIZE,
1702                                  &clogFileSize))
1703   {
1704     // convert to mbyte
1705     clogFileSize = (clogFileSize + 1024*1024 - 1) / (1024 * 1024);
1706     ndbrequire(clogFileSize >= 4 && clogFileSize <= 1024);
1707   }
1708 
1709   m_startup_report_frequency = 0;
1710   ndb_mgm_get_int_parameter(p,CFG_DB_STARTUP_REPORT_FREQUENCY,
1711                             &m_startup_report_frequency);
1712   totalLogFiles = 4 * cnoLogFiles;
1713   totallogMBytes = totalLogFiles * clogFileSize;
1714 
1715   cmaxLogFilesInPageZero = (ZPAGE_SIZE - ZPAGE_HEADER_SIZE - 128) /
1716     (ZFD_MBYTE_SIZE * clogFileSize);
1717 
1718   /**
1719    * "Old" cmaxLogFilesInPageZero was 40
1720    * Each FD need 3 words per mb, require that they can fit into 1 page
1721    *   (atleast 1 FD)
1722    * Is also checked in ConfigInfo.cpp (max FragmentLogFileSize = 1Gb)
1723    *   1Gb = 1024Mb => 3(ZFD_MBYTE_SIZE) * 1024 < 8192 (ZPAGE_SIZE)
1724    */
1725   if (cmaxLogFilesInPageZero > 40)
1726   {
1727     jam();
1728     cmaxLogFilesInPageZero = 40;
1729   }
1730   else
1731   {
1732     ndbrequire(cmaxLogFilesInPageZero);
1733   }
1734 
1735 #if defined VM_TRACE || defined ERROR_INSERT
1736   if (cmaxLogFilesInPageZero_DUMP != 0)
1737   {
1738     ndbout << "LQH DUMP 2396 " << cmaxLogFilesInPageZero_DUMP;
1739     if (cmaxLogFilesInPageZero_DUMP > cmaxLogFilesInPageZero)
1740     {
1741       ndbout << ": max allowed is " << cmaxLogFilesInPageZero << endl;
1742       // do not continue with useless test
1743       ndbabort();
1744     }
1745     cmaxLogFilesInPageZero = cmaxLogFilesInPageZero_DUMP;
1746     ndbout << endl;
1747   }
1748 #endif
1749 
1750   /* How many file's worth of info is actually valid? */
1751   cmaxValidLogFilesInPageZero = cmaxLogFilesInPageZero - 1;
1752 
1753   /* Must be at least 1 */
1754   ndbrequire(cmaxValidLogFilesInPageZero > 0);
1755 
1756    {
1757     Uint32 config_val = 20;
1758     ndb_mgm_get_int_parameter(p, CFG_DB_LCP_INTERVAL, &config_val);
1759     config_val = config_val > 31 ? 31 : config_val;
1760 
1761     const Uint32 mb = 1024 * 1024;
1762 
1763     // perform LCP after this amout of mbytes written
1764     const Uint64 config_mbytes = ((Uint64(4) << config_val) + mb - 1) / mb;
1765     const Uint64 totalmb = Uint64(cnoLogFiles) * Uint64(clogFileSize);
1766     if (totalmb > config_mbytes)
1767     {
1768       c_free_mb_force_lcp_limit = Uint32(totalmb - config_mbytes);
1769     }
1770     else
1771     {
1772       c_free_mb_force_lcp_limit = 0;
1773     }
1774 
1775     // No less than 33%
1776     Uint32 limit = Uint32(totalmb / 3);
1777     if (c_free_mb_force_lcp_limit < limit)
1778     {
1779       c_free_mb_force_lcp_limit = limit;
1780     }
1781   }
1782   c_free_mb_tail_problem_limit = 4;  // If less than 4Mb set TAIL_PROBLEM
1783 
1784   ndb_mgm_get_int_parameter(p, CFG_DB_TRANSACTION_DEADLOCK_TIMEOUT,
1785                             &cTransactionDeadlockDetectionTimeout);
1786 
1787   initRecords(p);
1788   initialiseRecordsLab(signal, 0, ref, senderData);
1789 
1790   c_max_redo_lag = 30;
1791   ndb_mgm_get_int_parameter(p, CFG_DB_REDO_OVERCOMMIT_LIMIT,
1792                             &c_max_redo_lag);
1793 
1794   c_max_redo_lag_counter = 3;
1795   ndb_mgm_get_int_parameter(p, CFG_DB_REDO_OVERCOMMIT_COUNTER,
1796                             &c_max_redo_lag_counter);
1797 
1798   c_max_parallel_scans_per_frag = 32;
1799   ndb_mgm_get_int_parameter(p, CFG_DB_PARALLEL_SCANS_PER_FRAG,
1800                             &c_max_parallel_scans_per_frag);
1801 
1802   if (c_max_parallel_scans_per_frag > (256 - MAX_PARALLEL_SCANS_PER_FRAG) / 2)
1803   {
1804     jam();
1805     c_max_parallel_scans_per_frag = (256 - MAX_PARALLEL_SCANS_PER_FRAG) / 2;
1806   }
1807 
1808   {
1809     Uint32 param = 60;
1810     ndb_mgm_get_int_parameter(p, CFG_DB_LCP_SCAN_WATCHDOG_LIMIT,
1811                               &param);
1812 
1813     /* LCP fail when LCP_SCAN_WATCHDOG_LIMIT exceeded */
1814     param *= 1000;  // Convert to milliseconds
1815     c_lcpFragWatchdog.MaxElapsedWithNoProgressMillis = param;
1816 
1817     /* Warn when stalled for roughly 1/3 time, */
1818     c_lcpFragWatchdog.WarnElapsedWithNoProgressMillis = (param + 2)/3;
1819 
1820     ndbrequire(c_lcpFragWatchdog.MaxElapsedWithNoProgressMillis >=
1821                c_lcpFragWatchdog.WarnElapsedWithNoProgressMillis);
1822 
1823     /* Dump LCPFragWatchdog parameter values */
1824     signal->theData[0] = 2395;
1825     execDUMP_STATE_ORD(signal);
1826    }
1827 
1828   return;
1829 }
1830 
1831 /* ########################################################################## */
1832 /* #######                          ADD/DELETE FRAGMENT MODULE        ####### */
1833 /*       THIS MODULE IS USED BY DICTIONARY TO CREATE NEW FRAGMENTS AND DELETE */
1834 /*       OLD FRAGMENTS.                                                       */
1835 /*                                                                            */
1836 /* ########################################################################## */
1837 /* -------------------------------------------------------------- */
1838 /*            FRAG REQ                                            */
1839 /* -------------------------------------------------------------- */
1840 /* *********************************************************> */
1841 /*  LQHFRAGREQ: Create new fragments for a table. Sender DICT */
1842 /* *********************************************************> */
1843 
1844 // this unbelievable mess could be replaced by one signal to LQH
1845 // and execute direct to local DICT to get everything at once
1846 void
execCREATE_TAB_REQ(Signal * signal)1847 Dblqh::execCREATE_TAB_REQ(Signal* signal)
1848 {
1849   CreateTabReq* req = (CreateTabReq*)signal->getDataPtr();
1850   tabptr.i = req->tableId;
1851   ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
1852 
1853   Uint32 senderRef = req->senderRef;
1854   Uint32 senderData = req->senderData;
1855 
1856   if (tabptr.p->tableStatus != Tablerec::NOT_DEFINED)
1857   {
1858     jam();
1859     CreateTabRef* ref = (CreateTabRef*)signal->getDataPtrSend();
1860     ref->senderData = senderData;
1861     ref->senderRef = reference();
1862     ref->errorCode = CreateTableRef::TableAlreadyExist;
1863     sendSignal(senderRef, GSN_CREATE_TAB_REF, signal,
1864                CreateTabRef::SignalLength, JBB);
1865     return;
1866   }
1867 
1868   seizeAddfragrec(signal);
1869   addfragptr.p->m_createTabReq = *req;
1870   req = &addfragptr.p->m_createTabReq;
1871 
1872   DEB_SCHEMA_VERSION(("(%u)tab: %u tableStatus = ADD_TABLE_ONGOING",
1873                       instance(),
1874                       tabptr.i));
1875   tabptr.p->tableStatus = Tablerec::ADD_TABLE_ONGOING;
1876   tabptr.p->tableType = req->tableType;
1877   tabptr.p->m_addfragptr_i = RNIL;
1878   tabptr.p->primaryTableId = (req->primaryTableId == RNIL ? tabptr.i :
1879                               req->primaryTableId);
1880   tabptr.p->schemaVersion = req->tableVersion;
1881   DEB_SCHEMA_VERSION(("(%u)tab(%u): %u tableStatus = ADD_TABLE_ONGOING",
1882                       instance(),
1883                       tabptr.p->schemaVersion,
1884                       tabptr.i));
1885   tabptr.p->m_disk_table= 0;
1886 
1887   addfragptr.p->addfragStatus = AddFragRecord::WAIT_TUP;
1888   sendCreateTabReq(signal, addfragptr);
1889 }
1890 
1891 void
sendCreateTabReq(Signal * signal,AddFragRecordPtr addfragptr)1892 Dblqh::sendCreateTabReq(Signal* signal, AddFragRecordPtr addfragptr)
1893 {
1894   TablerecPtr tabPtr;
1895   tabPtr.i = addfragptr.p->m_createTabReq.tableId;
1896   ptrCheckGuard(tabPtr, ctabrecFileSize, tablerec);
1897 
1898   CreateTabReq* req = (CreateTabReq*)signal->getDataPtrSend();
1899   * req = addfragptr.p->m_createTabReq;
1900 
1901   req->senderRef = reference();
1902   req->senderData = addfragptr.i;
1903 
1904   Uint32 ref = calcInstanceBlockRef(DBTUP);
1905   switch(addfragptr.p->addfragStatus){
1906   case AddFragRecord::WAIT_TUP:
1907     if (DictTabInfo::isOrderedIndex(tabPtr.p->tableType))
1908     {
1909       jam();
1910       req->noOfAttributes = 1;
1911       req->noOfKeyAttr = 1;
1912       req->noOfNullAttributes = 0;
1913     }
1914     break;
1915   case AddFragRecord::WAIT_TUX:
1916     jam();
1917     ndbrequire(req->noOfAttributes >= 2);
1918     req->noOfAttributes--;
1919     ref = calcInstanceBlockRef(DBTUX);
1920     break;
1921   default:
1922     jamLine(addfragptr.p->addfragStatus);
1923     ndbabort();
1924   }
1925 
1926   sendSignal(ref, GSN_CREATE_TAB_REQ, signal,
1927              CreateTabReq::SignalLengthLDM, JBB);
1928 }
1929 
1930 void
execCREATE_TAB_REF(Signal * signal)1931 Dblqh::execCREATE_TAB_REF(Signal* signal)
1932 {
1933   jamEntry();
1934 
1935   CreateTabRef * ref = (CreateTabRef*)signal->getDataPtr();
1936   addfragptr.i = ref->senderData;
1937   ptrCheckGuard(addfragptr, caddfragrecFileSize, addFragRecord);
1938 
1939   abortAddFragOps(signal);
1940 
1941   ref->senderRef = reference();
1942   ref->senderData = addfragptr.p->m_createTabReq.senderData;
1943   sendSignal(addfragptr.p->m_createTabReq.senderRef,
1944              GSN_CREATE_TAB_REF, signal, CreateTabConf::SignalLength, JBB);
1945 
1946 
1947   releaseAddfragrec(signal);
1948 }
1949 
1950 void
execCREATE_TAB_CONF(Signal * signal)1951 Dblqh::execCREATE_TAB_CONF(Signal* signal)
1952 {
1953   jamEntry();
1954   CreateTabConf* conf = (CreateTabConf*)signal->getDataPtr();
1955   addfragptr.i = conf->senderData;
1956   ptrCheckGuard(addfragptr, caddfragrecFileSize, addFragRecord);
1957 
1958   TablerecPtr tabPtr;
1959   tabPtr.i = addfragptr.p->m_createTabReq.tableId;
1960   ptrCheckGuard(tabPtr, ctabrecFileSize, tablerec);
1961 
1962   switch(addfragptr.p->addfragStatus){
1963   case AddFragRecord::WAIT_TUP:
1964     jam();
1965     addfragptr.p->tupConnectptr = conf->tupConnectPtr;
1966     if (DictTabInfo::isOrderedIndex(tabPtr.p->tableType))
1967     {
1968       jam();
1969       addfragptr.p->addfragStatus = AddFragRecord::WAIT_TUX;
1970       sendCreateTabReq(signal, addfragptr);
1971       return;
1972     }
1973     break;
1974   case AddFragRecord::WAIT_TUX:
1975     jam();
1976     addfragptr.p->tuxConnectptr = conf->tuxConnectPtr;
1977     break;
1978   default:
1979     jamLine(addfragptr.p->addfragStatus);
1980     ndbabort();
1981   }
1982 
1983   addfragptr.p->addfragStatus = AddFragRecord::WAIT_ADD_ATTR;
1984 
1985   conf->senderRef = reference();
1986   conf->senderData = addfragptr.p->m_createTabReq.senderData;
1987   conf->lqhConnectPtr = addfragptr.i;
1988   sendSignal(addfragptr.p->m_createTabReq.senderRef,
1989              GSN_CREATE_TAB_CONF, signal, CreateTabConf::SignalLength, JBB);
1990 }
1991 
1992 /* ************************************************************************> */
1993 /*  LQHADDATTRREQ: Request from DICT to create attributes for the new table. */
1994 /* ************************************************************************> */
execLQHADDATTREQ(Signal * signal)1995 void Dblqh::execLQHADDATTREQ(Signal* signal)
1996 {
1997   jamEntry();
1998   LqhAddAttrReq * req = (LqhAddAttrReq*)signal->getDataPtr();
1999 
2000   addfragptr.i = req->lqhFragPtr;
2001   ptrCheckGuard(addfragptr, caddfragrecFileSize, addFragRecord);
2002 
2003   addfragptr.p->m_addAttrReq = * req;
2004 
2005   const Uint32 tnoOfAttr = req->noOfAttributes;
2006   const Uint32 numSections = signal->getNoOfSections();
2007   bool isLongReq= ( numSections != 0 );
2008   addfragptr.p->defValSectionI = RNIL;
2009   addfragptr.p->defValNextPos = 0;
2010 
2011   if (isLongReq)
2012   {
2013     SectionHandle handle(this, signal);
2014     SegmentedSectionPtr defValSection;
2015     handle.getSection(defValSection, LqhAddAttrReq::DEFAULT_VALUE_SECTION_NUM);
2016     addfragptr.p->defValSectionI = defValSection.i;
2017     addfragptr.p->defValNextPos = 0;
2018     //Don't free Section here. Section is freed after default values are trasfered to TUP
2019     handle.clear();
2020   }
2021 
2022   ndbrequire(addfragptr.p->addfragStatus == AddFragRecord::WAIT_ADD_ATTR);
2023   ndbrequire((tnoOfAttr != 0) && (tnoOfAttr <= LqhAddAttrReq::MAX_ATTRIBUTES));
2024   addfragptr.p->totalAttrReceived += tnoOfAttr;
2025   ndbrequire(addfragptr.p->totalAttrReceived <=
2026              addfragptr.p->m_createTabReq.noOfAttributes);
2027 
2028   addfragptr.p->attrReceived = tnoOfAttr;
2029 
2030   TablerecPtr tabPtr;
2031   tabPtr.i = addfragptr.p->m_createTabReq.tableId;
2032   ptrCheckGuard(tabPtr, ctabrecFileSize, tablerec);
2033 
2034   for (Uint32 i = 0; i < tnoOfAttr; i++)
2035   {
2036     if(AttributeDescriptor::getDiskBased(req->attributes[i].attrDescriptor))
2037     {
2038       jam();
2039       tabPtr.p->m_disk_table = 1;
2040     }
2041   }//for
2042 
2043   addfragptr.p->attrSentToTup = 0;
2044   addfragptr.p->addfragStatus = AddFragRecord::TUP_ATTR_WAIT;
2045   sendAddAttrReq(signal);
2046 }//Dblqh::execLQHADDATTREQ()
2047 
2048 /* *********************>> */
2049 /*  TUP_ADD_ATTCONF      > */
2050 /* *********************>> */
execTUP_ADD_ATTCONF(Signal * signal)2051 void Dblqh::execTUP_ADD_ATTCONF(Signal* signal)
2052 {
2053   jamEntry();
2054   addfragptr.i = signal->theData[0];
2055   // implies that operation was released on the other side
2056   const bool lastAttr = signal->theData[1];
2057   ptrCheckGuard(addfragptr, caddfragrecFileSize, addFragRecord);
2058 
2059   tabptr.i = addfragptr.p->m_createTabReq.tableId;
2060   ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
2061 
2062   Uint32 noOfAttr = addfragptr.p->m_createTabReq.noOfAttributes;
2063 
2064   switch (addfragptr.p->addfragStatus) {
2065   case AddFragRecord::TUP_ATTR_WAIT:
2066     if (DictTabInfo::isOrderedIndex(tabptr.p->tableType))
2067     {
2068       addfragptr.p->addfragStatus = AddFragRecord::TUX_ATTR_WAIT;
2069       sendAddAttrReq(signal);
2070       break;
2071     }
2072     goto done_with_attr;
2073     break;
2074   case AddFragRecord::TUX_ATTR_WAIT:
2075     jam();
2076     if (lastAttr)
2077       addfragptr.p->tuxConnectptr = RNIL;
2078     goto done_with_attr;
2079     break;
2080   done_with_attr:
2081     addfragptr.p->attrSentToTup = addfragptr.p->attrSentToTup + 1;
2082     ndbrequire(addfragptr.p->attrSentToTup <= addfragptr.p->attrReceived);
2083     ndbrequire(addfragptr.p->totalAttrReceived <= noOfAttr);
2084     if (addfragptr.p->attrSentToTup < addfragptr.p->attrReceived)
2085     {
2086       // more in this batch
2087       jam();
2088       addfragptr.p->addfragStatus = AddFragRecord::TUP_ATTR_WAIT;
2089       sendAddAttrReq(signal);
2090       return;
2091     }
2092 
2093     if (addfragptr.p->defValSectionI != RNIL)
2094     {
2095       releaseSection(addfragptr.p->defValSectionI);
2096       addfragptr.p->defValNextPos = 0;
2097       addfragptr.p->defValSectionI = RNIL;
2098     }
2099 
2100     { // Reply
2101       LqhAddAttrConf *const conf = (LqhAddAttrConf*)signal->getDataPtrSend();
2102       conf->senderData = addfragptr.p->m_addAttrReq.senderData;
2103       conf->senderAttrPtr = addfragptr.p->m_addAttrReq.senderAttrPtr;
2104       sendSignal(addfragptr.p->m_createTabReq.senderRef,
2105                  GSN_LQHADDATTCONF, signal, LqhAddAttrConf::SignalLength, JBB);
2106     }
2107     if (addfragptr.p->totalAttrReceived < noOfAttr)
2108     {
2109       jam();
2110       addfragptr.p->addfragStatus = AddFragRecord::WAIT_ADD_ATTR;
2111     }
2112     else
2113     {
2114       jam();
2115       releaseAddfragrec(signal);
2116     }
2117     break;
2118   default:
2119     ndbabort();
2120   }
2121 }
2122 
2123 /* **********************>> */
2124 /*  TUX_ADD_ATTRCONF      > */
2125 /* **********************>> */
execTUX_ADD_ATTRCONF(Signal * signal)2126 void Dblqh::execTUX_ADD_ATTRCONF(Signal* signal)
2127 {
2128   jamEntry();
2129   execTUP_ADD_ATTCONF(signal);
2130 }//Dblqh::execTUX_ADD_ATTRCONF
2131 
2132 /* *********************> */
2133 /*  TUP_ADD_ATTREF      > */
2134 /* *********************> */
execTUP_ADD_ATTRREF(Signal * signal)2135 void Dblqh::execTUP_ADD_ATTRREF(Signal* signal)
2136 {
2137   jamEntry();
2138   addfragptr.i = signal->theData[0];
2139   ptrCheckGuard(addfragptr, caddfragrecFileSize, addFragRecord);
2140   const Uint32 errorCode = terrorCode = signal->theData[1];
2141 
2142   abortAddFragOps(signal);
2143 
2144   // operation was released on the other side
2145   switch (addfragptr.p->addfragStatus) {
2146   case AddFragRecord::TUP_ATTR_WAIT:
2147     jam();
2148     break;
2149   case AddFragRecord::TUX_ATTR_WAIT:
2150     jam();
2151     break;
2152   default:
2153     ndbabort();
2154   }
2155 
2156   if (addfragptr.p->defValSectionI != RNIL)
2157   {
2158     releaseSection(addfragptr.p->defValSectionI);
2159     addfragptr.p->defValNextPos = 0;
2160     addfragptr.p->defValSectionI = RNIL;
2161   }
2162 
2163   const Uint32 Ref = addfragptr.p->m_createTabReq.senderRef;
2164   const Uint32 senderData = addfragptr.p->m_addAttrReq.senderData;
2165 
2166   releaseAddfragrec(signal);
2167 
2168   LqhAddAttrRef *const ref = (LqhAddAttrRef*)signal->getDataPtrSend();
2169   ref->senderData = senderData;
2170   ref->errorCode = errorCode;
2171   sendSignal(Ref, GSN_LQHADDATTREF, signal,
2172 	     LqhAddAttrRef::SignalLength, JBB);
2173 }//Dblqh::execTUP_ADD_ATTRREF()
2174 
2175 /* **********************> */
2176 /*  TUX_ADD_ATTRREF      > */
2177 /* **********************> */
execTUX_ADD_ATTRREF(Signal * signal)2178 void Dblqh::execTUX_ADD_ATTRREF(Signal* signal)
2179 {
2180   jamEntry();
2181   execTUP_ADD_ATTRREF(signal);
2182 }//Dblqh::execTUX_ADD_ATTRREF
2183 
2184 /*
2185  * Add attribute in TUP or TUX.  Called up to 4 times.
2186  */
2187 void
sendAddAttrReq(Signal * signal)2188 Dblqh::sendAddAttrReq(Signal* signal)
2189 {
2190   arrGuard(addfragptr.p->attrSentToTup, LqhAddAttrReq::MAX_ATTRIBUTES);
2191   LqhAddAttrReq::Entry& entry =
2192     addfragptr.p->m_addAttrReq.attributes[addfragptr.p->attrSentToTup];
2193 
2194   const Uint32 attrId = entry.attrId & 0xffff;
2195   const Uint32 primaryAttrId = entry.attrId >> 16;
2196 
2197   tabptr.i = addfragptr.p->m_createTabReq.tableId;
2198   ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
2199 
2200   if (addfragptr.p->addfragStatus == AddFragRecord::TUP_ATTR_WAIT)
2201   {
2202     if (DictTabInfo::isTable(tabptr.p->tableType) ||
2203         DictTabInfo::isHashIndex(tabptr.p->tableType) ||
2204         (DictTabInfo::isOrderedIndex(tabptr.p->tableType) &&
2205          primaryAttrId == ZNIL)) {
2206       jam();
2207       TupAddAttrReq* const tupreq = (TupAddAttrReq*)signal->getDataPtrSend();
2208       tupreq->tupConnectPtr = addfragptr.p->tupConnectptr;
2209       tupreq->attrId = attrId;
2210       tupreq->attrDescriptor = entry.attrDescriptor;
2211       tupreq->extTypeInfo = entry.extTypeInfo;
2212       BlockReference tupRef = calcInstanceBlockRef(DBTUP);
2213 
2214       Uint32 sectionLen = 0;
2215       Uint32 startIndex = TupAddAttrReq::SignalLength;
2216       if (addfragptr.p->defValSectionI != RNIL)
2217       {
2218         SegmentedSectionPtr defValSection;
2219         getSection(defValSection, addfragptr.p->defValSectionI);
2220 
2221         SectionReader defValueReader(defValSection, getSectionSegmentPool());
2222 
2223         ndbrequire(defValueReader.step(addfragptr.p->defValNextPos));
2224 
2225         Uint32 defValueHeader;
2226         ndbrequire(defValueReader.peekWord(&defValueHeader));
2227 
2228         AttributeHeader ah(defValueHeader);
2229         Uint32 defValueLen = ah.getByteSize();
2230         Uint32 defValueWords = ((defValueLen +3)/4) + 1;
2231         Uint32 *dst = &signal->theData[startIndex];
2232         ndbrequire(defValueReader.getWords(dst, defValueWords));
2233         addfragptr.p->defValNextPos += defValueWords;
2234         sectionLen = defValueWords;
2235       }
2236 
2237       //A long section is attached when a default value is sent.
2238       if (sectionLen != 0)
2239       {
2240         LinearSectionPtr ptr[3];
2241         ptr[0].p= &signal->theData[startIndex];
2242         ptr[0].sz= sectionLen;
2243         sendSignal(tupRef, GSN_TUP_ADD_ATTRREQ,
2244                    signal, TupAddAttrReq::SignalLength, JBB, ptr, 1);
2245       }
2246       else
2247         sendSignal(tupRef, GSN_TUP_ADD_ATTRREQ,
2248                    signal, TupAddAttrReq::SignalLength, JBB);
2249 
2250       return;
2251     }
2252     if (DictTabInfo::isOrderedIndex(tabptr.p->tableType) &&
2253         primaryAttrId != ZNIL) {
2254       // this attribute is not for TUP
2255       jam();
2256       TupAddAttrConf* tupconf = (TupAddAttrConf*)signal->getDataPtrSend();
2257       tupconf->userPtr = addfragptr.i;
2258       tupconf->lastAttr = false;
2259       sendSignal(reference(), GSN_TUP_ADD_ATTCONF,
2260 		 signal, TupAddAttrConf::SignalLength, JBB);
2261       return;
2262     }
2263   }
2264 
2265   if (addfragptr.p->addfragStatus == AddFragRecord::TUX_ATTR_WAIT)
2266   {
2267     jam();
2268     if (DictTabInfo::isOrderedIndex(tabptr.p->tableType) &&
2269         primaryAttrId != ZNIL) {
2270       jam();
2271       TuxAddAttrReq* const tuxreq = (TuxAddAttrReq*)signal->getDataPtrSend();
2272       tuxreq->tuxConnectPtr = addfragptr.p->tuxConnectptr;
2273       tuxreq->notused1 = 0;
2274       tuxreq->attrId = attrId;
2275       tuxreq->attrDescriptor = entry.attrDescriptor;
2276       tuxreq->extTypeInfo = entry.extTypeInfo;
2277       tuxreq->primaryAttrId = primaryAttrId;
2278       BlockReference tuxRef = calcInstanceBlockRef(DBTUX);
2279       sendSignal(tuxRef, GSN_TUX_ADD_ATTRREQ,
2280 		 signal, TuxAddAttrReq::SignalLength, JBB);
2281       return;
2282     }
2283     if (DictTabInfo::isOrderedIndex(tabptr.p->tableType) &&
2284         primaryAttrId == ZNIL) {
2285       // this attribute is not for TUX
2286       jam();
2287       TuxAddAttrConf* tuxconf = (TuxAddAttrConf*)signal->getDataPtrSend();
2288       tuxconf->userPtr = addfragptr.i;
2289       tuxconf->lastAttr = false;
2290       sendSignal(reference(), GSN_TUX_ADD_ATTRCONF,
2291 		 signal, TuxAddAttrConf::SignalLength, JBB);
2292       return;
2293     }
2294   }
2295   ndbabort();
2296 }//Dblqh::sendAddAttrReq
2297 
2298 /**
2299  * Return the schemaVersion-part that changes when table is created.
2300  * This function can be called during restart from thread where
2301  * TSMAN belongs, so it is important to not use any block variables
2302  * here. The table object should not change here during this phase
2303  * since it is in a very specific restart phase.
2304  */
getCreateSchemaVersion(Uint32 tableId)2305 Uint32 Dblqh::getCreateSchemaVersion(Uint32 tableId)
2306 {
2307   TablerecPtr tabPtr;
2308   tabPtr.i = tableId;
2309   ptrCheckGuard(tabPtr, ctabrecFileSize, tablerec);
2310   Uint32 schemaVersion;
2311   if (tabPtr.p->tableStatus == Tablerec::TABLE_DEFINED ||
2312       tabPtr.p->tableStatus == Tablerec::TABLE_READ_ONLY)
2313   {
2314     schemaVersion = (tabPtr.p->schemaVersion & 0xFFFFFF);
2315   }
2316   else
2317   {
2318     schemaVersion = 0;
2319   }
2320 #ifdef DEBUG_SCHEMA_VERSION
2321   if (schemaVersion == 0)
2322   {
2323     g_eventLogger->info("(%u) table: %u, schemaVersion: %u, tableStatus: %u",
2324                         instance(),
2325                         tableId,
2326                         tabPtr.p->schemaVersion,
2327                         tabPtr.p->tableStatus);
2328   }
2329 #endif
2330   return schemaVersion;
2331 }
2332 
execLQHFRAGREQ(Signal * signal)2333 void Dblqh::execLQHFRAGREQ(Signal* signal)
2334 {
2335   jamEntry();
2336   {
2337     LqhFragReq  *req = (LqhFragReq*)signal->getDataPtr();
2338     if (signal->length() == LqhFragReq::OldestSignalLength)
2339     {
2340       jam();
2341       ndbabort(); /* Not supported to upgrade from < 7.2 */
2342       /**
2343        * Upgrade support to specify partitionId
2344        */
2345       req->partitionId = req->fragmentId;
2346       /**
2347        * Upgrade support to specify createGci
2348        */
2349       req->createGci = 0;
2350     }
2351     if (signal->length() == LqhFragReq::OldSignalLength)
2352     {
2353       jam();
2354       ndbabort(); /* Not supported to upgrade from < 7.2 */
2355       /**
2356        * Upgrade support to specify createGci
2357        */
2358       req->createGci = 0;
2359     }
2360   }
2361 
2362   c_num_fragments_created_since_restart++;
2363 
2364   LqhFragReq copy = *(LqhFragReq*)signal->getDataPtr();
2365   LqhFragReq * req = &copy;
2366 
2367   tabptr.i = req->tableId;
2368   ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
2369 
2370   if (tabptr.p->tableStatus != Tablerec::ADD_TABLE_ONGOING &&
2371       (AlterTableReq::getAddFragFlag(req->changeMask) == 0))
2372   {
2373     jam();
2374     fragrefLab(signal, ZTAB_STATE_ERROR, req);
2375     return;
2376   }//if
2377 
2378   if (getFragmentrec(signal, req->fragId))
2379   {
2380     jam();
2381     fragrefLab(signal, terrorCode, req);
2382     return;
2383   }//if
2384 
2385   if (!insertFragrec(signal, req->fragId))
2386   {
2387     jam();
2388     fragrefLab(signal, terrorCode, req);
2389     return;
2390   }//if
2391 
2392   Uint32 copyType = req->requestInfo & 3;
2393   bool tempTable = ((req->requestInfo & LqhFragReq::TemporaryTable) != 0);
2394   initFragrec(signal, tabptr.i, req->fragId, copyType);
2395   fragptr.p->createGci = req->createGci;
2396   fragptr.p->startGci = req->startGci;
2397   fragptr.p->newestGci = req->startGci;
2398   DEB_LCP(("(%u)LQHFRAGREQ: tab(%u,%u) createGci: %u, startGci: %u,"
2399            " newestGci: %u",
2400            instance(),
2401            tabptr.i,
2402            req->fragId,
2403            fragptr.p->createGci,
2404            fragptr.p->startGci,
2405            fragptr.p->newestGci));
2406   set_min_keep_gci(fragptr.p->createGci);
2407 
2408   if (fragptr.p->newestGci < req->createGci)
2409   {
2410     jam();
2411     fragptr.p->newestGci = req->createGci;
2412   }
2413   ndbrequire(tabptr.p->tableType < 256);
2414   fragptr.p->tableType = (Uint8)tabptr.p->tableType;
2415 
2416   {
2417     NdbLogPartInfo lpinfo(instance());
2418     Uint32 logPartNo = lpinfo.partNoFromId(req->logPartId);
2419     ndbrequire(lpinfo.partNoOwner(logPartNo));
2420 
2421     LogPartRecordPtr ptr;
2422     ptr.i = lpinfo.partNoIndex(logPartNo);
2423     ptrCheckGuard(ptr, clogPartFileSize, logPartRecord);
2424     ndbrequire(ptr.p->logPartNo == logPartNo);
2425 
2426     fragptr.p->m_log_part_ptr_i = ptr.i;
2427     fragptr.p->lqhInstanceKey = getInstanceKey(tabptr.i, req->fragId);
2428   }
2429 
2430   /* Init per-frag op counters */
2431   fragptr.p->m_useStat.init();
2432 
2433   if (DictTabInfo::isOrderedIndex(tabptr.p->tableType)) {
2434     jam();
2435     // find corresponding primary table fragment
2436     TablerecPtr tTablePtr;
2437     tTablePtr.i = tabptr.p->primaryTableId;
2438     ptrCheckGuard(tTablePtr, ctabrecFileSize, tablerec);
2439     FragrecordPtr tFragPtr;
2440     tFragPtr.i = RNIL;
2441     for (Uint32 i = 0; i < NDB_ARRAY_SIZE(tTablePtr.p->fragid); i++) {
2442       if (tTablePtr.p->fragid[i] == fragptr.p->fragId) {
2443         jam();
2444         tFragPtr.i = tTablePtr.p->fragrec[i];
2445         break;
2446       }
2447     }
2448     ndbrequire(tFragPtr.i != RNIL);
2449     // store it
2450     fragptr.p->tableFragptr = tFragPtr.i;
2451   }
2452   else
2453   {
2454     jam();
2455     fragptr.p->tableFragptr = fragptr.i;
2456   }
2457 
2458   if (tempTable)
2459   {
2460 //--------------------------------------------
2461 // reqinfo bit 3-4 = 2 means temporary table
2462 // without logging or checkpointing.
2463 //--------------------------------------------
2464     jam();
2465     fragptr.p->logFlag = Fragrecord::STATE_FALSE;
2466     fragptr.p->lcpFlag = Fragrecord::LCP_STATE_FALSE;
2467   }//if
2468 
2469   seizeAddfragrec(signal);
2470   addfragptr.p->m_lqhFragReq = * req;
2471   addfragptr.p->fragmentPtr = fragptr.i;
2472 
2473   if (DictTabInfo::isTable(tabptr.p->tableType) ||
2474       DictTabInfo::isHashIndex(tabptr.p->tableType)) {
2475     jam();
2476     AccFragReq* const accreq = (AccFragReq*)signal->getDataPtrSend();
2477     accreq->userPtr = addfragptr.i;
2478     accreq->userRef = cownref;
2479     accreq->tableId = tabptr.i;
2480     accreq->reqInfo = 0;
2481     accreq->fragId = req->fragId;
2482     accreq->localKeyLen = addfragptr.p->m_lqhFragReq.localKeyLength;
2483     accreq->maxLoadFactor = addfragptr.p->m_lqhFragReq.maxLoadFactor;
2484     accreq->minLoadFactor = addfragptr.p->m_lqhFragReq.minLoadFactor;
2485     accreq->kValue = addfragptr.p->m_lqhFragReq.kValue;
2486     accreq->lhFragBits = addfragptr.p->m_lqhFragReq.lh3DistrBits;
2487     accreq->lhDirBits = addfragptr.p->m_lqhFragReq.lh3PageBits;
2488     accreq->keyLength = addfragptr.p->m_lqhFragReq.keyLength;
2489     /* --------------------------------------------------------------------- */
2490     /* Send ACCFRAGREQ, when confirmation is received send 2 * TUPFRAGREQ to */
2491     /* create 2 tuple fragments on this node.                                */
2492     /* --------------------------------------------------------------------- */
2493     addfragptr.p->addfragStatus = AddFragRecord::ACC_ADDFRAG;
2494     sendSignal(fragptr.p->accBlockref, GSN_ACCFRAGREQ,
2495 	       signal, AccFragReq::SignalLength, JBB);
2496     return;
2497   }
2498   if (DictTabInfo::isOrderedIndex(tabptr.p->tableType)) {
2499     jam();
2500     addfragptr.p->addfragStatus = AddFragRecord::WAIT_TUP;
2501     sendAddFragReq(signal);
2502     return;
2503   }
2504   ndbabort();
2505 }//Dblqh::execLQHFRAGREQ()
2506 
2507 /* *************** */
2508 /*  ACCFRAGCONF  > */
2509 /* *************** */
execACCFRAGCONF(Signal * signal)2510 void Dblqh::execACCFRAGCONF(Signal* signal)
2511 {
2512   jamEntry();
2513   addfragptr.i = signal->theData[0];
2514   Uint32 taccConnectptr = signal->theData[1];
2515   //Uint32 fragId1 = signal->theData[2];
2516   Uint32 accFragPtr1 = signal->theData[4];
2517   ptrCheckGuard(addfragptr, caddfragrecFileSize, addFragRecord);
2518   ndbrequire(addfragptr.p->addfragStatus == AddFragRecord::ACC_ADDFRAG);
2519 
2520   addfragptr.p->accConnectptr = taccConnectptr;
2521   fragptr.i = addfragptr.p->fragmentPtr;
2522   c_fragment_pool.getPtr(fragptr);
2523   fragptr.p->accFragptr = accFragPtr1;
2524 
2525   addfragptr.p->addfragStatus = AddFragRecord::WAIT_TUP;
2526   sendAddFragReq(signal);
2527 }//Dblqh::execACCFRAGCONF()
2528 
2529 /* *************** */
2530 /*  TUPFRAGCONF  > */
2531 /* *************** */
execTUPFRAGCONF(Signal * signal)2532 void Dblqh::execTUPFRAGCONF(Signal* signal)
2533 {
2534   jamEntry();
2535   addfragptr.i = signal->theData[0];
2536   Uint32 tupConnectptr = signal->theData[1];
2537   Uint32 tupFragPtr = signal->theData[2];  /* TUP FRAGMENT POINTER */
2538   //Uint32 localFragId = signal->theData[3];  /* LOCAL FRAGMENT ID    */
2539   ptrCheckGuard(addfragptr, caddfragrecFileSize, addFragRecord);
2540   fragptr.i = addfragptr.p->fragmentPtr;
2541   c_fragment_pool.getPtr(fragptr);
2542   tabptr.i = fragptr.p->tabRef;
2543   ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
2544 
2545   switch (addfragptr.p->addfragStatus) {
2546   case AddFragRecord::WAIT_TUP:
2547     jam();
2548     fragptr.p->tupFragptr = tupFragPtr;
2549     addfragptr.p->tupConnectptr = tupConnectptr;
2550     if (DictTabInfo::isOrderedIndex(tabptr.p->tableType))
2551     {
2552       addfragptr.p->addfragStatus = AddFragRecord::WAIT_TUX;
2553       sendAddFragReq(signal);
2554       break;
2555     }
2556     c_acc->set_tup_fragptr(fragptr.p->accFragptr, tupFragPtr);
2557     goto done_with_frag;
2558     break;
2559   case AddFragRecord::WAIT_TUX:
2560     jam();
2561     fragptr.p->tuxFragptr = tupFragPtr;
2562     addfragptr.p->tuxConnectptr = tupConnectptr;
2563     goto done_with_frag;
2564     break;
2565   done_with_frag:
2566     /* ---------------------------------------------------------------- */
2567     /* Finished create of fragments. Now ready for creating attributes. */
2568     /* ---------------------------------------------------------------- */
2569     fragptr.p->fragStatus = Fragrecord::FSACTIVE;
2570     {
2571       LqhFragConf* conf = (LqhFragConf*)signal->getDataPtrSend();
2572       conf->senderData = addfragptr.p->m_lqhFragReq.senderData;
2573       conf->lqhFragPtr = RNIL;
2574       conf->tableId = addfragptr.p->m_lqhFragReq.tableId;
2575       conf->fragId = fragptr.p->fragId;
2576       conf->changeMask = addfragptr.p->m_lqhFragReq.changeMask;
2577       sendSignal(addfragptr.p->m_lqhFragReq.senderRef, GSN_LQHFRAGCONF,
2578 		 signal, LqhFragConf::SignalLength, JBB);
2579     }
2580     releaseAddfragrec(signal);
2581     break;
2582   default:
2583     ndbabort();
2584   }
2585 }//Dblqh::execTUPFRAGCONF()
2586 
2587 /* *************** */
2588 /*  TUXFRAGCONF  > */
2589 /* *************** */
execTUXFRAGCONF(Signal * signal)2590 void Dblqh::execTUXFRAGCONF(Signal* signal)
2591 {
2592   jamEntry();
2593   execTUPFRAGCONF(signal);
2594 }//Dblqh::execTUXFRAGCONF
2595 
2596 /*
2597  * Add fragment in TUP or TUX.  Called up to 4 times.
2598  */
2599 void
sendAddFragReq(Signal * signal)2600 Dblqh::sendAddFragReq(Signal* signal)
2601 {
2602   fragptr.i = addfragptr.p->fragmentPtr;
2603   c_fragment_pool.getPtr(fragptr);
2604   tabptr.i = fragptr.p->tabRef;
2605   ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
2606   if (addfragptr.p->addfragStatus == AddFragRecord::WAIT_TUP)
2607   {
2608     TupFragReq* const tupFragReq = (TupFragReq*)signal->getDataPtrSend();
2609     tupFragReq->userPtr = addfragptr.i;
2610     tupFragReq->userRef = cownref;
2611     tupFragReq->reqInfo = 0; /* ADD TABLE */
2612     tupFragReq->tableId = tabptr.i;
2613     tupFragReq->fragId = addfragptr.p->m_lqhFragReq.fragId;
2614     tupFragReq->tablespaceid = addfragptr.p->m_lqhFragReq.tablespace_id;
2615     tupFragReq->maxRowsHigh = addfragptr.p->m_lqhFragReq.maxRowsHigh;
2616     tupFragReq->maxRowsLow = addfragptr.p->m_lqhFragReq.maxRowsLow;
2617     tupFragReq->minRowsHigh = addfragptr.p->m_lqhFragReq.minRowsHigh;
2618     tupFragReq->minRowsLow = addfragptr.p->m_lqhFragReq.minRowsLow;
2619     tupFragReq->changeMask = addfragptr.p->m_lqhFragReq.changeMask;
2620     tupFragReq->partitionId = addfragptr.p->m_lqhFragReq.partitionId;
2621     sendSignal(fragptr.p->tupBlockref, GSN_TUPFRAGREQ,
2622                signal, TupFragReq::SignalLength, JBB);
2623     return;
2624   }
2625   if (addfragptr.p->addfragStatus == AddFragRecord::WAIT_TUX)
2626   {
2627     jam();
2628     ndbrequire(DictTabInfo::isOrderedIndex(tabptr.p->tableType));
2629     TuxFragReq* const tuxreq = (TuxFragReq*)signal->getDataPtrSend();
2630     tuxreq->userPtr = addfragptr.i;
2631     tuxreq->userRef = cownref;
2632     tuxreq->reqInfo = 0; /* ADD TABLE */
2633     tuxreq->tableId = tabptr.i;
2634     tuxreq->fragId = addfragptr.p->m_lqhFragReq.fragId;
2635     tuxreq->primaryTableId = tabptr.p->primaryTableId;
2636     // pointer to index fragment in TUP
2637     tuxreq->tupIndexFragPtrI = fragptr.p->tupFragptr;
2638     // pointers to table fragments in TUP and ACC
2639     FragrecordPtr tFragPtr;
2640     tFragPtr.i = fragptr.p->tableFragptr;
2641     c_fragment_pool.getPtr(tFragPtr);
2642     tuxreq->tupTableFragPtrI = tFragPtr.p->tupFragptr;
2643     tuxreq->accTableFragPtrI = tFragPtr.p->accFragptr;
2644     sendSignal(fragptr.p->tuxBlockref, GSN_TUXFRAGREQ,
2645                signal, TuxFragReq::SignalLength, JBB);
2646     return;
2647   }
2648 }//Dblqh::sendAddFragReq
2649 
2650 
2651 /* ************************************************************************>> */
2652 /*  TAB_COMMITREQ: Commit the new table for use in transactions. Sender DICT. */
2653 /* ************************************************************************>> */
insert_new_fragments_into_lcp(Signal * signal)2654 void Dblqh::insert_new_fragments_into_lcp(Signal *signal)
2655 {
2656   /**
2657    * When a disk data table is recovered its recovery will start by executing
2658    * the UNDO. The idea is that each fragment LCP records the LCP identity
2659    * that the fragment LCP will assist to restore. This means that the UNDO
2660    * log will at least be executed until the UNDO log record indicating the
2661    * start of this LCP id is found in the UNDO log. If no LCP have yet been
2662    * executed the UNDO log will be executed until the beginning of the UNDO
2663    * log.
2664    *
2665    * Thus we are safe that all UNDO logs will be executed to restore exactly
2666    * the page state at the moment the checkpoint was started. The LCP code in
2667    * the BACKUP block ensures that for each fragment LCP this is a well
2668    * defined time to ensure that the disk data record is synchronized with the
2669    * in-memory records. In particular this refers to the pointers from disk
2670    * data record to in-memory record and pointer from in-memory record to disk
2671    * data record.
2672    *
2673    * When a crash occurs and there is no fragment LCP yet executed for the
2674    * table we have a some what more complicated problem to handle. In this
2675    * case we want to UNDO everything until the table was created.
2676    *
2677    * If we do nothing special we cannot trust that the UNDO log is executed
2678    * to this point. As an example we could be creating the table while LCP
2679    * 14 is ongoing. However since the table didn't exist when LCP 14 was
2680    * started in the master DIH, there will not be any LCP_FRAG_ORD sent to
2681    * execute this fragment LCP. This means that the first LCP the table will
2682    * participate in is LCP 15.
2683    *
2684    * Now if the node crashes when all fragment LCPs from LCP 14 have completed
2685    * but not all of them have completed LCP 15 we are still safe since at least
2686    * one fragment LCP will require restore from LCP 14. Since the table was
2687    * created after the start of LCP 14 we know that the UNDO log execution will
2688    * undo all rows inserted since the table was created.
2689    *
2690    * However one case exists that we cannot handle. This is when all fragment
2691    * LCPs have completed  LCP 15 except the fragment LCPs of the new table.
2692    *
2693    * This is not necessarily a rare event since the new table is likely to get
2694    * the highest table id of all tables and thus its fragment LCPs are executed
2695    * after all other fragment LCPs have completed.
2696    *
2697    * There are many ways to solve this problem. One problem would be to ensure
2698    * that a fragment LCP has been executed before the API users are allowed to
2699    * execute any writes to the table.
2700    *
2701    * Another approach would be to know that all extents belonging to the table
2702    * must be returned at recovery and thus ensuring that we create an empty
2703    * set of disk data records which is the desired outcome.
2704    *
2705    * None of those approaches are well suited to the current code in NDB, they
2706    * would both require substantial changes to the LCP code and UNDO log
2707    * recovery code.
2708    *
2709    * The approach we have followed here is to ensure that the LCP is started
2710    * from LQH independent of when DIH decides to start it. This approach is
2711    * well aligned with the current development that makes LCPs more and more
2712    * local to LQH. LQH already maintains a queue of fragments to execute
2713    * fragment LCPs on.
2714    *
2715    * So the basic idea of our approach is to insert the fragment into the
2716    * queue of fragment LCPs. By so doing the above problem is no longer an
2717    * issue since we will execute the first fragment LCP already in LCP 14
2718    * and thus there is no chance that we can find ourself in the situation
2719    * described above.
2720    *
2721    * So now we need to describe a proof that our approach works.
2722    * The basis of this proof is to consider how the insert into the LCP
2723    * fragment queue is handled dependent on the progress of the LCPs.
2724    *
2725    * Case 1:
2726    * We create the table during execution of an LCP, say for example LCP 14.
2727    * We will insert the table last in the LCP fragment queue in this
2728    * function. This function is called from execTAB_COMMITREQ, this signal
2729    * is called before any write operation can start against the table.
2730    *
2731    * We conclude that since we insert the fragment into the LCP queue before
2732    * the LCP 14 is completed we know that the first fragment LCP of the new
2733    * table will happen in LCP 14. Thus recovery after LCP 15 have started is
2734    * trivial since there will a fragment LCP to restore.
2735    *
2736    * Thus the only case to consider is when the node crashes before the
2737    * fragment LCP has been made restorable. This happens before LCP 14 is
2738    * completed and thus any recovery will at least execute UNDO log records
2739    * until the start of LCP 14.
2740    *
2741    * One exception is that some fragment LCP might even require execution of
2742    * UNDO log records back to LCP 13 (or beginning of UNDO log if LCP 14 was
2743    * the first LCP in the UNDO log).
2744    *
2745    * Another very rare exception is that during recovery the new table is the
2746    * only fragment to restore (or that all fragments are new fragments
2747    * without an LCP to execute). This can happen in nodes added where not
2748    * even SYSTAB_0 is available.
2749    *
2750    * In this special case the LCP id that we will execute UNDO log unto is
2751    * LCP (0,0). There is a rare case that could cause issues here. This is
2752    * when we have had older tables that made us fill the UNDO log at least
2753    * once. In this case we might execute the UNDO log and never find the
2754    * UNDO log record referring to the end of the UNDO log and neither will
2755    * we find the LCP record of the LCP (0,0).
2756    *
2757    * In this we get the LCP identity from DIH by using SYSFILE->latestLCP_ID.
2758    * If SYSFILE->latestLCP_ID is 15 it means that LCP 14 was fully completed
2759    * before the crash. It is important that we grab the SYSFILE->latestLCP_ID
2760    * from the local node and not from the master DIH. This number is sent to
2761    * NDBCNTR in the early phases. We make sure that this LCP id is also
2762    * present in DBLQH to handle this rare special case.
2763    *
2764    * If NDBCNTR reports this number as 15, we know that LCP 14 was fully
2765    * completed before the crash. The only reason why LCP 14 might not be fully
2766    * completed is if we had a cluster restart that failed, but in that case we
2767    * have completed executing the UNDO log fully before we crashed the cluster
2768    * restart. Thus there will be no UNDO log execution required at all in this
2769    * case for the new tables since they have already been undone.
2770    *
2771    * Thus we have proven that with those two extra measures, first to insert
2772    * the fragment into the LCP queue before the CREATE TABLE completes and
2773    * by ensuring that DBLQH receives the latest LCP id we are sure that the
2774    * UNDO recovery of the new table will be performed in a correct manner.
2775    *
2776    * Case 2:
2777    * The second option is that the table is created after an LCP has
2778    * completed and before the next LCP has started.
2779    *
2780    * In this case we need to ensure that the fragment LCPs are not
2781    * executed at the end of the LCP. This comes natural by inserting
2782    * them into the LCP queue before any other fragment has been
2783    * inserted into the LCP queue.
2784    *
2785    * However we need another step to be safe in this case. The problem
2786    * here is that checkpoints are not completed one at a time, they are
2787    * executed in parallel. Especially the next fragment LCP is executed
2788    * while earlier fragment LCPs are still waiting for the UNDO log to
2789    * be written up to a certain LSN.
2790    *
2791    * To handle this in a safe we introduce a synchronisation point. All
2792    * fragment LCPs that are inserted when no LCP was ongoing are first
2793    * in the queue of fragment scheduled to execute an LCP. We introduce
2794    * a new signal that ensures that all of those fragment LCPs are
2795    * completed before we start with any fragment LCPs requested by DIH.
2796    *
2797    * This method ensures that the fragment LCPs for new tables are not
2798    * placed at the end of the LCP, rather it is placed even before any
2799    * other fragment LCP is executed. Thus a stronger condition than
2800    * required, but certainly strong enough.
2801    *
2802    * With this we are safe that if the table is created between LCP
2803    * 14 and 15, then the table have been checkpointed if any normal
2804    * table has its LCP executed for LCP 15. Thus we know that if
2805    * the recovery has a new table without a fragment LCP, then we know
2806    * that no fragment LCP has been executed in LCP 15. Thus we are
2807    * certain that the UNDO log will be executed back to the start of
2808    * LCP 14.
2809    *
2810    * Again the same reasoning as above on Case 1: also applies to
2811    * Case 2: when it comes to the case of no fragment with any
2812    * LCP executed on it.
2813    *
2814    * Thus we have proven the thesis that disk data recovery will be
2815    * ok for new table fragments that haven't executed their first LCP
2816    * at a node restart.
2817    *
2818    * A new table have been created. We must ensure that the table is
2819    * inserted into the list of waiting LCPs. The
2820    * m_create_table_flag_lcp_frag_ord indicates that there is no LCP_FRAG_ORD
2821    * outstanding for this fragment. The fragment is new and it should not be
2822    * possible for it to be part of any checkpoint yet. It will not be executed
2823    * until a checkpoint is started.
2824    *
2825    * m_create_table_insert_lcp indicates that this checkpoint requested has
2826    * been requested by a table creation. This only needs to be signalled when
2827    * no LCP is ongoing at the time when it is inserted. It is used to ensure
2828    * that the set of checkpoints on new tables between LCPs are executed AND
2829    * completed before any other checkpoints on any other fragments are
2830    * performed as described above.
2831    */
2832   lcpPtr.i = 0;
2833   ptrCheckGuard(lcpPtr, clcpFileSize, lcpRecord);
2834   for (Uint32 i = 0; i < NDB_ARRAY_SIZE(tabptr.p->fragid); i++)
2835   {
2836     FragrecordPtr curr_fragptr;
2837     curr_fragptr.i = tabptr.p->fragrec[i];
2838     if (curr_fragptr.i != RNIL)
2839     {
2840       jam();
2841       c_fragment_pool.getPtr(curr_fragptr);
2842       jamLine(Uint16(curr_fragptr.p->fragId));
2843       ndbrequire(curr_fragptr.p->lcp_frag_ord_state ==
2844                  Fragrecord::LCP_EXECUTED);
2845       curr_fragptr.p->m_create_table_flag_lcp_frag_ord = true;
2846       curr_fragptr.p->lcp_frag_ord_state = Fragrecord::LCP_QUEUED;
2847 
2848       DEB_EARLY_LCP(("(%u)Create table tab(%u,%u), insert into LCP, "
2849                      "LCP running: %u",
2850                      instance(),
2851                      curr_fragptr.p->tabRef,
2852                      curr_fragptr.p->fragId,
2853                      clcpCompletedState == LCP_RUNNING));
2854 
2855       c_queued_lcp_frag_ord.addLast(curr_fragptr);
2856       if (clcpCompletedState != LCP_RUNNING)
2857       {
2858         jam();
2859         curr_fragptr.p->m_create_table_insert_lcp = true;
2860       }
2861       else
2862       {
2863         jam();
2864         /**
2865          * It is possible that we have received all LCP_FRAG_ORD except
2866          * the last one containing lastFragmentFlag. Thus it is vital to
2867          * ensure that we kick-start the next fragment checkpoint here if
2868          * there is no LCP being neither prepared nor running.
2869          */
2870         if (is_lcp_idle(lcpPtr.p))
2871         {
2872           jam();
2873           ndbrequire(lcpPtr.p->lastFragmentFlag == false);
2874           prepare_next_fragment_checkpoint(signal, false);
2875         }
2876       }
2877     }
2878   }
2879 }
2880 
execTAB_COMMITREQ(Signal * signal)2881 void Dblqh::execTAB_COMMITREQ(Signal* signal)
2882 {
2883   jamEntry();
2884   Uint32 dihPtr = signal->theData[0];
2885   BlockReference dihBlockref = signal->theData[1];
2886   tabptr.i = signal->theData[2];
2887 
2888   if (tabptr.i >= ctabrecFileSize) {
2889     jam();
2890     terrorCode = ZTAB_FILE_SIZE;
2891     signal->theData[0] = dihPtr;
2892     signal->theData[1] = cownNodeid;
2893     signal->theData[2] = tabptr.i;
2894     signal->theData[3] = terrorCode;
2895     sendSignal(dihBlockref, GSN_TAB_COMMITREF, signal, 4, JBB);
2896     return;
2897   }//if
2898   ptrAss(tabptr, tablerec);
2899   if (tabptr.p->tableStatus != Tablerec::ADD_TABLE_ONGOING) {
2900     jam();
2901     terrorCode = ZTAB_STATE_ERROR;
2902     signal->theData[0] = dihPtr;
2903     signal->theData[1] = cownNodeid;
2904     signal->theData[2] = tabptr.i;
2905     signal->theData[3] = terrorCode;
2906     signal->theData[4] = tabptr.p->tableStatus;
2907     sendSignal(dihBlockref, GSN_TAB_COMMITREF, signal, 5, JBB);
2908     ndbabort();
2909     return;
2910   }//if
2911   if (cstartPhase == ZNIL &&
2912       !DictTabInfo::isOrderedIndex(tabptr.p->tableType))
2913   {
2914     jam();
2915     insert_new_fragments_into_lcp(signal);
2916   }
2917   tabptr.p->usageCountR = 0;
2918   tabptr.p->usageCountW = 0;
2919   tabptr.p->tableStatus = Tablerec::TABLE_DEFINED;
2920   DEB_SCHEMA_VERSION(("(%u)tab: %u tableStatus = TABLE_DEFINED",
2921                       instance(),
2922                       tabptr.i));
2923   c_pgman->set_table_ready_for_prep_lcp_writes(tabptr.i, true);
2924   signal->theData[0] = dihPtr;
2925   signal->theData[1] = cownNodeid;
2926   signal->theData[2] = tabptr.i;
2927   sendSignal(dihBlockref, GSN_TAB_COMMITCONF, signal, 3, JBB);
2928 
2929   return;
2930 }//Dblqh::execTAB_COMMITREQ()
2931 
2932 
fragrefLab(Signal * signal,Uint32 errorCode,const LqhFragReq * req)2933 void Dblqh::fragrefLab(Signal* signal,
2934                        Uint32 errorCode,
2935                        const LqhFragReq* req)
2936 {
2937   LqhFragRef * ref = (LqhFragRef*)signal->getDataPtrSend();
2938   ref->senderData = req->senderData;
2939   ref->errorCode = errorCode;
2940   ref->requestInfo = req->requestInfo;
2941   ref->tableId = req->tableId;
2942   ref->fragId = req->fragId;
2943   ref->changeMask = req->changeMask;
2944   sendSignal(req->senderRef, GSN_LQHFRAGREF, signal,
2945 	     LqhFragRef::SignalLength, JBB);
2946   return;
2947 }//Dblqh::fragrefLab()
2948 
2949 /*
2950  * Abort on-going ops.
2951  */
abortAddFragOps(Signal * signal)2952 void Dblqh::abortAddFragOps(Signal* signal)
2953 {
2954   if (addfragptr.p->tupConnectptr != RNIL) {
2955     jam();
2956     TupFragReq* const tupFragReq = (TupFragReq*)signal->getDataPtrSend();
2957     tupFragReq->userPtr = (Uint32)-1;
2958     tupFragReq->userRef = addfragptr.p->tupConnectptr;
2959     sendSignal(ctupBlockref, GSN_TUPFRAGREQ, signal, 2, JBB);
2960     addfragptr.p->tupConnectptr = RNIL;
2961   }
2962   if (addfragptr.p->tuxConnectptr != RNIL) {
2963     jam();
2964     TuxFragReq* const tuxFragReq = (TuxFragReq*)signal->getDataPtrSend();
2965     tuxFragReq->userPtr = (Uint32)-1;
2966     tuxFragReq->userRef = addfragptr.p->tuxConnectptr;
2967     sendSignal(ctuxBlockref, GSN_TUXFRAGREQ, signal, 2, JBB);
2968     addfragptr.p->tuxConnectptr = RNIL;
2969   }
2970 }
2971 
2972 /* ************>> */
2973 /*  ACCFRAGREF  > */
2974 /* ************>> */
execACCFRAGREF(Signal * signal)2975 void Dblqh::execACCFRAGREF(Signal* signal)
2976 {
2977   jamEntry();
2978   addfragptr.i = signal->theData[0];
2979   ptrCheckGuard(addfragptr, caddfragrecFileSize, addFragRecord);
2980   Uint32 errorCode = terrorCode = signal->theData[1];
2981   ndbrequire(addfragptr.p->addfragStatus == AddFragRecord::ACC_ADDFRAG);
2982 
2983   fragrefLab(signal, errorCode, &addfragptr.p->m_lqhFragReq);
2984   releaseAddfragrec(signal);
2985 
2986   return;
2987 }//Dblqh::execACCFRAGREF()
2988 
2989 /* ************>> */
2990 /*  TUPFRAGREF  > */
2991 /* ************>> */
execTUPFRAGREF(Signal * signal)2992 void Dblqh::execTUPFRAGREF(Signal* signal)
2993 {
2994   jamEntry();
2995   addfragptr.i = signal->theData[0];
2996   ptrCheckGuard(addfragptr, caddfragrecFileSize, addFragRecord);
2997   Uint32 errorCode = terrorCode = signal->theData[1];
2998   fragptr.i = addfragptr.p->fragmentPtr;
2999   c_fragment_pool.getPtr(fragptr);
3000 
3001   // no operation to release, just add some jams
3002   switch (addfragptr.p->addfragStatus) {
3003   case AddFragRecord::WAIT_TUP:
3004     jam();
3005     break;
3006   case AddFragRecord::WAIT_TUX:
3007     jam();
3008     break;
3009   default:
3010     ndbabort();
3011   }
3012 
3013   fragrefLab(signal, errorCode, &addfragptr.p->m_lqhFragReq);
3014   releaseAddfragrec(signal);
3015 
3016 }//Dblqh::execTUPFRAGREF()
3017 
3018 void
execDROP_FRAG_REQ(Signal * signal)3019 Dblqh::execDROP_FRAG_REQ(Signal* signal)
3020 {
3021   DropFragReq *req = (DropFragReq*)signal->getDataPtr();
3022   seizeAddfragrec(signal);
3023   addfragptr.p->m_dropFragReq = *req;
3024 
3025   /**
3026    * 1 - self
3027    * 2 - acc
3028    * 3 - tup
3029    * 4 - tux (optional)
3030    */
3031   tabptr.i = req->tableId;
3032   ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
3033 
3034   deleteFragrec(req->fragId);
3035 
3036   Uint32 ref = calcInstanceBlockRef(DBACC);
3037   if (DictTabInfo::isOrderedIndex(tabptr.p->tableType))
3038   {
3039     jam();
3040     ref = calcInstanceBlockRef(DBTUP);
3041   }
3042 
3043   req->senderRef = reference();
3044   req->senderData = addfragptr.i;
3045   sendSignal(ref, GSN_DROP_FRAG_REQ, signal, DropFragReq::SignalLength, JBB);
3046 }
3047 
3048 void
execDROP_FRAG_REF(Signal * signal)3049 Dblqh::execDROP_FRAG_REF(Signal* signal)
3050 {
3051   ndbabort();
3052 }
3053 
3054 void
execDROP_FRAG_CONF(Signal * signal)3055 Dblqh::execDROP_FRAG_CONF(Signal* signal)
3056 {
3057   DropFragConf* conf = (DropFragConf*)signal->getDataPtr();
3058   addfragptr.i = conf->senderData;
3059   ptrCheckGuard(addfragptr, caddfragrecFileSize, addFragRecord);
3060 
3061   Uint32 ref = RNIL;
3062   switch(refToMain(conf->senderRef)){
3063   case DBACC:
3064     jam();
3065     ref = calcInstanceBlockRef(DBTUP);
3066     break;
3067   case DBTUP:
3068   {
3069     tabptr.i = addfragptr.p->m_dropFragReq.tableId;
3070     ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
3071     if (DictTabInfo::isOrderedIndex(tabptr.p->tableType))
3072     {
3073       jam();
3074       ref = calcInstanceBlockRef(DBTUX);
3075     }
3076     break;
3077   }
3078   case DBTUX:
3079     break;
3080   default:
3081     ndbabort();
3082   }
3083 
3084   if (ref != RNIL)
3085   {
3086     DropFragReq* req = (DropFragReq*)signal->getDataPtrSend();
3087     * req = addfragptr.p->m_dropFragReq;
3088     req->senderRef = reference();
3089     req->senderData = addfragptr.i;
3090     sendSignal(ref, GSN_DROP_FRAG_REQ, signal, DropFragReq::SignalLength,
3091                JBB);
3092     return;
3093   }
3094 
3095   conf->senderRef = reference();
3096   conf->senderData = addfragptr.p->m_dropFragReq.senderData;
3097   conf->tableId = addfragptr.p->m_dropFragReq.tableId;
3098   conf->fragId = addfragptr.p->m_dropFragReq.fragId;
3099   sendSignal(addfragptr.p->m_dropFragReq.senderRef, GSN_DROP_FRAG_CONF,
3100              signal, DropFragConf::SignalLength, JBB);
3101 
3102   releaseAddfragrec(signal);
3103 }
3104 
3105 /* ************>> */
3106 /*  TUXFRAGREF  > */
3107 /* ************>> */
execTUXFRAGREF(Signal * signal)3108 void Dblqh::execTUXFRAGREF(Signal* signal)
3109 {
3110   jamEntry();
3111   execTUPFRAGREF(signal);
3112 }//Dblqh::execTUXFRAGREF
3113 
3114 void
execPREP_DROP_TAB_REQ(Signal * signal)3115 Dblqh::execPREP_DROP_TAB_REQ(Signal* signal){
3116   jamEntry();
3117 
3118   PrepDropTabReq* req = (PrepDropTabReq*)signal->getDataPtr();
3119 
3120   Uint32 senderRef = req->senderRef;
3121   Uint32 senderData = req->senderData;
3122 
3123   TablerecPtr tabPtr;
3124   tabPtr.i = req->tableId;
3125   ptrCheckGuard(tabPtr, ctabrecFileSize, tablerec);
3126 
3127   Uint32 errCode = 0;
3128   switch(tabPtr.p->tableStatus) {
3129   case Tablerec::TABLE_DEFINED:
3130     jam();
3131     break;
3132   case Tablerec::NOT_DEFINED:
3133     jam();
3134     errCode = PrepDropTabRef::NoSuchTable;
3135     break;
3136   case Tablerec::ADD_TABLE_ONGOING:
3137     jam();
3138     errCode = PrepDropTabRef::NoSuchTable;
3139     break;
3140   case Tablerec::PREP_DROP_TABLE_DONE:
3141     jam();
3142     errCode = PrepDropTabRef::DropInProgress;
3143     break;
3144   case Tablerec::DROP_TABLE_WAIT_USAGE:
3145   case Tablerec::DROP_TABLE_WAIT_DONE:
3146   case Tablerec::DROP_TABLE_ACC:
3147   case Tablerec::DROP_TABLE_TUP:
3148   case Tablerec::DROP_TABLE_TUX:
3149     jam();
3150     errCode = PrepDropTabRef::DropInProgress;
3151     break;
3152   case Tablerec::TABLE_READ_ONLY:
3153     jam();
3154     errCode = PrepDropTabRef::InvalidTableState;
3155     break;
3156   }
3157 
3158   if(errCode != 0)
3159   {
3160     jam();
3161 
3162     PrepDropTabRef* ref = (PrepDropTabRef*)signal->getDataPtrSend();
3163     ref->senderRef = reference();
3164     ref->senderData = senderData;
3165     ref->tableId = tabPtr.i;
3166     ref->errorCode = errCode;
3167     sendSignal(senderRef, GSN_PREP_DROP_TAB_REF, signal,
3168 	       PrepDropTabRef::SignalLength, JBB);
3169     return;
3170   }
3171 
3172   tabPtr.p->m_senderData = senderData;
3173   tabPtr.p->m_senderRef = senderRef;
3174   c_pgman->set_table_ready_for_prep_lcp_writes(tabPtr.i, false);
3175   check_pgman_prep_lcp_active_prep_drop_tab(signal, tabPtr.i);
3176 }
3177 
3178 /**
3179  * In PGMAN we have a feature that writes data pages before the actual
3180  * checkpoint happens. These pages are tagged with PREP_LCP as the state.
3181  * We need to ensure that no such writes are outstanding for a table
3182  * that is being prepared to drop. We will only write such pages when
3183  * the table is active in PGMAN. We declare it as inactive when we start
3184  * preparing to drop the table.
3185  */
3186 void
check_pgman_prep_lcp_active_prep_drop_tab(Signal * signal,Uint32 tabPtrI)3187 Dblqh::check_pgman_prep_lcp_active_prep_drop_tab(Signal *signal,
3188                                                  Uint32 tabPtrI)
3189 {
3190   if (c_pgman->is_prep_lcp_writes_outstanding(tabPtrI))
3191   {
3192     jam();
3193     signal->theData[0] = ZPGMAN_PREP_LCP_ACTIVE_CHECK;
3194     signal->theData[1] = tabPtrI;
3195     signal->theData[2] = 0;
3196     sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 10, 3);
3197     return;
3198   }
3199 
3200   TablerecPtr tabPtr;
3201   tabPtr.i = tabPtrI;
3202   ptrCheckGuard(tabPtr, ctabrecFileSize, tablerec);
3203   tabPtr.p->tableStatus = Tablerec::PREP_DROP_TABLE_DONE;
3204   DEB_SCHEMA_VERSION(("(%u)tab: %u tableStatus = PREP_DROP_TABLE_DONE(2)",
3205                       instance(),
3206                       tabPtr.i));
3207   PrepDropTabConf * conf = (PrepDropTabConf*)signal->getDataPtrSend();
3208   conf->tableId = tabPtrI;
3209   conf->senderRef = reference();
3210   conf->senderData = tabPtr.p->m_senderData;
3211   sendSignal(tabPtr.p->m_senderRef, GSN_PREP_DROP_TAB_CONF, signal,
3212 	     PrepDropTabConf::SignalLength, JBB);
3213 }
3214 
3215 void
execINFORM_BACKUP_DROP_TAB_CONF(Signal * signal)3216 Dblqh::execINFORM_BACKUP_DROP_TAB_CONF(Signal *signal)
3217 {
3218   TablerecPtr tabPtr;
3219   tabPtr.i = signal->theData[0];
3220   ptrCheckGuard(tabPtr, ctabrecFileSize, tablerec);
3221   tabPtr.p->m_informed_backup_drop_tab = true;
3222 }
3223 
3224 void
check_pgman_prep_lcp_active_drop_tab(Signal * signal,Uint32 tabPtrI)3225 Dblqh::check_pgman_prep_lcp_active_drop_tab(Signal *signal,
3226                                             Uint32 tabPtrI)
3227 {
3228   if (c_pgman->is_prep_lcp_writes_outstanding(tabPtrI))
3229   {
3230     jam();
3231     signal->theData[0] = ZPGMAN_PREP_LCP_ACTIVE_CHECK;
3232     signal->theData[1] = tabPtrI;
3233     signal->theData[2] = 1;
3234     sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 10, 3);
3235     return;
3236   }
3237   TablerecPtr tabPtr;
3238   tabPtr.i = tabPtrI;
3239   ptrCheckGuard(tabPtr, ctabrecFileSize, tablerec);
3240   tabPtr.p->tableStatus = Tablerec::DROP_TABLE_WAIT_USAGE;
3241   DEB_SCHEMA_VERSION(("(%u)tab: %u tableStatus = DROP_TABLE_WAIT_USAGE(2)",
3242                       instance(),
3243                       tabPtr.i));
3244   signal->theData[0] = ZDROP_TABLE_WAIT_USAGE;
3245   signal->theData[1] = tabPtrI;
3246   signal->theData[2] = tabPtr.p->m_senderRef;
3247   signal->theData[3] = tabPtr.p->m_addfragptr_i;
3248   dropTab_wait_usage(signal);
3249 }
3250 
3251 void
dropTab_wait_usage(Signal * signal)3252 Dblqh::dropTab_wait_usage(Signal* signal){
3253 
3254   TablerecPtr tabPtr;
3255   FragrecordPtr loc_fragptr;
3256   tabPtr.i = signal->theData[1];
3257   ptrCheckGuard(tabPtr, ctabrecFileSize, tablerec);
3258 
3259   Uint32 senderRef = signal->theData[2];
3260   Uint32 senderData = signal->theData[3];
3261 
3262   ndbrequire(tabPtr.p->tableStatus == Tablerec::DROP_TABLE_WAIT_USAGE);
3263 
3264   if (tabPtr.p->usageCountR > 0 || tabPtr.p->usageCountW > 0)
3265   {
3266     jam();
3267     sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 10, 4);
3268     return;
3269   }
3270 
3271   bool lcpDone = true;
3272   lcpPtr.i = 0;
3273   ptrAss(lcpPtr, lcpRecord);
3274   if (!is_lcp_idle(lcpPtr.p))
3275   {
3276     jam();
3277 
3278     for (Uint32 i = 0; i < NDB_ARRAY_SIZE(tabPtr.p->fragrec); i++)
3279     {
3280       jam();
3281       loc_fragptr.i = tabPtr.p->fragrec[i];
3282       if (loc_fragptr.i != RNIL)
3283       {
3284         jam();
3285         c_fragment_pool.getPtr(loc_fragptr);
3286         if ((loc_fragptr.p->lcp_frag_ord_state == Fragrecord::LCP_QUEUED) &&
3287             (!ERROR_INSERTED(5089)))
3288         {
3289           /**
3290            * The fragment is queued up for an LCP scan, but it hasn't
3291            * started yet. In this case the LCP scan will be faked anyways,
3292            * so we will remove it from the queue immediately and fake its
3293            * completion. The only reason to send this signal is to ensure
3294            * that DIH and other blocks that wait for this REP signal can
3295            * keep track of the outstanding number of outstanding signals.
3296            * It will be dropped immediately after that when received in
3297            * DIH since the table is being dropped.
3298            */
3299           LcpRecord::FragOrd fragOrd;
3300           jam();
3301           CLEAR_ERROR_INSERT_VALUE;
3302           c_queued_lcp_frag_ord.remove(loc_fragptr);
3303           loc_fragptr.p->lcp_frag_ord_state = Fragrecord::LCP_EXECUTED;
3304 
3305           if (!loc_fragptr.p->m_create_table_flag_lcp_frag_ord)
3306           {
3307             jam();
3308             fragOrd.lcpFragOrd.lcpNo = loc_fragptr.p->lcp_frag_ord_lcp_no;
3309             fragOrd.lcpFragOrd.lcpId = loc_fragptr.p->lcp_frag_ord_lcp_id;
3310             fragOrd.lcpFragOrd.fragmentId = loc_fragptr.p->fragId;
3311             fragOrd.lcpFragOrd.tableId = loc_fragptr.p->tabRef;
3312             sendLCP_FRAG_REP(signal, fragOrd, loc_fragptr.p);
3313           }
3314           else
3315           {
3316             DEB_EARLY_LCP(("(%u)Remove tab(%u,%u) with flag for lcp_frag_ord",
3317                            instance(),
3318                            loc_fragptr.p->tabRef,
3319                            loc_fragptr.p->fragId));
3320           }
3321           loc_fragptr.p->m_create_table_flag_lcp_frag_ord = false;
3322           loc_fragptr.p->m_create_table_insert_lcp = false;
3323         }
3324         else if (loc_fragptr.p->lcp_frag_ord_state ==
3325                  Fragrecord::LCP_EXECUTING)
3326         {
3327           /**
3328            * The LCP scan is ongoing, we need to make sure it has completed
3329            * before we can drop the table. Thus we need to continue the
3330            * wait for a while longer.
3331            */
3332           jam();
3333           CLEAR_ERROR_INSERT_VALUE;
3334           lcpDone = false;
3335         }
3336         else if (ERROR_INSERTED(5088) || ERROR_INSERTED(5089))
3337         {
3338           /**
3339            * Delay drop table until we reach either LCP_QUEUED or
3340            * LCP_EXECUTING.
3341            */
3342           jam();
3343           lcpDone = false;
3344         }
3345       }
3346     }
3347   }
3348   else
3349   {
3350     for (Uint32 i = 0; i < NDB_ARRAY_SIZE(tabPtr.p->fragrec); i++)
3351     {
3352       loc_fragptr.i = tabPtr.p->fragrec[i];
3353       if (loc_fragptr.i != RNIL)
3354       {
3355         c_fragment_pool.getPtr(loc_fragptr);
3356         if (loc_fragptr.p->lcp_frag_ord_state == Fragrecord::LCP_QUEUED)
3357         {
3358           jam();
3359           jamLine(Uint16(loc_fragptr.p->fragId));
3360           ndbrequire(loc_fragptr.p->m_create_table_flag_lcp_frag_ord == true);
3361           c_queued_lcp_frag_ord.remove(loc_fragptr);
3362           loc_fragptr.p->lcp_frag_ord_state = Fragrecord::LCP_EXECUTED;
3363           loc_fragptr.p->m_create_table_flag_lcp_frag_ord = false;
3364           loc_fragptr.p->m_create_table_insert_lcp = false;
3365           DEB_EARLY_LCP(("(%u)Remove tab(%u,%u) with flag for lcp_frag_ord, "
3366                          "No LCP running",
3367                          instance(),
3368                          loc_fragptr.p->tabRef,
3369                          loc_fragptr.p->fragId));
3370         }
3371       }
3372     }
3373     if (ERROR_INSERTED(5088) || ERROR_INSERTED(5089))
3374     {
3375       jam();
3376       CLEAR_ERROR_INSERT_VALUE;
3377     }
3378   }
3379 
3380   if(!lcpDone)
3381   {
3382     jam();
3383     signal->theData[0] = ZDROP_TABLE_WAIT_USAGE;
3384     signal->theData[1] = tabPtr.i;
3385     signal->theData[2] = senderRef;
3386     signal->theData[3] = senderData;
3387     sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 10, 4);
3388     return;
3389   }
3390   if (!tabPtr.p->m_informed_backup_drop_tab)
3391   {
3392     jam();
3393     signal->theData[0] = ZDROP_TABLE_WAIT_USAGE;
3394     signal->theData[1] = tabPtr.i;
3395     signal->theData[2] = senderRef;
3396     signal->theData[3] = senderData;
3397     sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 1, 4);
3398 
3399     signal->theData[0] = tabPtr.i;
3400     signal->theData[1] = reference();
3401     BlockReference backupRef = calcInstanceBlockRef(BACKUP);
3402     sendSignal(backupRef, GSN_INFORM_BACKUP_DROP_TAB_REQ, signal, 2, JBB);
3403     return;
3404   }
3405   tabPtr.p->tableStatus = Tablerec::DROP_TABLE_WAIT_DONE;
3406   DEB_SCHEMA_VERSION(("(%u)tab: %u tableStatus = DROP_TABLE_WAIT_DONE",
3407                       instance(),
3408                       tabPtr.i));
3409 
3410   if (tabPtr.p->m_addfragptr_i == RNIL)
3411   {
3412     jam();
3413     DropTabConf * conf = (DropTabConf*)signal->getDataPtrSend();
3414     conf->tableId = tabPtr.i;
3415     conf->senderRef = reference();
3416     conf->senderData = senderData;
3417     sendSignal(senderRef, GSN_DROP_TAB_CONF, signal,
3418 	       DropTabConf::SignalLength, JBB);
3419   }
3420   else
3421   {
3422     jam();
3423     Ptr<AddFragRecord> addFragPtr;
3424     addFragPtr.i = senderData;
3425     ptrCheckGuard(addFragPtr, caddfragrecFileSize, addFragRecord);
3426     dropTable_nextStep(signal, addFragPtr);
3427   }
3428 }
3429 
3430 void
execDROP_TAB_REQ(Signal * signal)3431 Dblqh::execDROP_TAB_REQ(Signal* signal){
3432   jamEntry();
3433   if (ERROR_INSERTED(5076))
3434   {
3435     /**
3436      * This error insert simulates a situation where it takes a long time
3437      * to execute DROP_TAB_REQ, such that we can crash the (dict) master
3438      * while there is an outstanding DROP_TAB_REQ.
3439      */
3440     jam();
3441     CLEAR_ERROR_INSERT_VALUE;
3442     sendSignalWithDelay(reference(), GSN_DROP_TAB_REQ, signal, 10000,
3443                         signal->getLength());
3444     return;
3445   }
3446   if (ERROR_INSERTED(5077))
3447   {
3448     jam();
3449     CLEAR_ERROR_INSERT_VALUE;
3450     /**
3451      * Kill this node 2 seconds from now. We wait for two seconds to make sure
3452      * that DROP_TAB_REQ messages have reached other nodes before this one
3453      * dies.
3454      */
3455     signal->theData[0] = 9999;
3456     sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 2000, 1);
3457     return;
3458   }
3459   DropTabReq reqCopy = * (DropTabReq*)signal->getDataPtr();
3460   DropTabReq* req = &reqCopy;
3461 
3462   TablerecPtr tabPtr;
3463   tabPtr.i = req->tableId;
3464   ptrCheckGuard(tabPtr, ctabrecFileSize, tablerec);
3465 
3466   Uint32 errCode = 0;
3467   switch((DropTabReq::RequestType)req->requestType) {
3468   case DropTabReq::RestartDropTab:
3469     jam();
3470     // Fall through
3471   case DropTabReq::CreateTabDrop:
3472     if (tabPtr.p->tableStatus == Tablerec::TABLE_DEFINED)
3473     {
3474       jam();
3475       ndbrequire(tabPtr.p->usageCountR == 0 && tabPtr.p->usageCountW == 0);
3476       seizeAddfragrec(signal);
3477       tabPtr.p->m_addfragptr_i = addfragptr.i;
3478       addfragptr.p->m_dropTabReq = * req;
3479       tabPtr.p->m_informed_backup_drop_tab = false;
3480       tabPtr.p->m_senderRef = req->senderRef;
3481       c_pgman->set_table_ready_for_prep_lcp_writes(tabPtr.i, false);
3482       check_pgman_prep_lcp_active_drop_tab(signal, tabPtr.i);
3483       return;
3484     }
3485     else
3486     {
3487       jam();
3488       tabPtr.p->tableStatus = Tablerec::DROP_TABLE_WAIT_DONE;
3489       DEB_SCHEMA_VERSION(("(%u)tab: %u tableStatus = DROP_TABLE_WAIT_DONE(2)",
3490                           instance(),
3491                           tabPtr.i));
3492     }
3493     break;
3494   case DropTabReq::OnlineDropTab:
3495     jam();
3496     switch(tabPtr.p->tableStatus) {
3497     case Tablerec::TABLE_DEFINED:
3498       jam();
3499       errCode = DropTabRef::DropWoPrep;
3500       break;
3501     case Tablerec::NOT_DEFINED:
3502       jam();
3503       errCode = DropTabRef::NoSuchTable;
3504       break;
3505     case Tablerec::ADD_TABLE_ONGOING:
3506       jam();
3507       ndbassert(false);
3508       // Fall through
3509     case Tablerec::PREP_DROP_TABLE_DONE:
3510       jam();
3511       tabPtr.p->m_informed_backup_drop_tab = false;
3512       tabPtr.p->tableStatus = Tablerec::DROP_TABLE_WAIT_USAGE;
3513       DEB_SCHEMA_VERSION(("(%u)tab: %u tableStatus = DROP_TABLE_WAIT_USAGE",
3514                           instance(),
3515                           tabPtr.i));
3516       signal->theData[0] = ZDROP_TABLE_WAIT_USAGE;
3517       signal->theData[1] = tabPtr.i;
3518       signal->theData[2] = req->senderRef;
3519       signal->theData[3] = req->senderData;
3520       dropTab_wait_usage(signal);
3521       return;
3522       break;
3523     case Tablerec::DROP_TABLE_WAIT_USAGE:
3524     case Tablerec::DROP_TABLE_ACC:
3525     case Tablerec::DROP_TABLE_TUP:
3526     case Tablerec::DROP_TABLE_TUX:
3527       ndbabort();
3528     case Tablerec::DROP_TABLE_WAIT_DONE:
3529       jam();
3530       break;
3531     case Tablerec::TABLE_READ_ONLY:
3532       jam();
3533       errCode = DropTabRef::InvalidTableState;
3534       break;
3535     }
3536   }
3537 
3538   if (errCode)
3539   {
3540     jam();
3541     DropTabRef * ref = (DropTabRef*)signal->getDataPtrSend();
3542     ref->tableId = tabPtr.i;
3543     ref->senderRef = reference();
3544     ref->senderData = req->senderData;
3545     ref->errorCode = errCode;
3546     sendSignal(req->senderRef, GSN_DROP_TAB_REF, signal,
3547                DropTabRef::SignalLength, JBB);
3548     return;
3549   }
3550 
3551   ndbrequire(tabPtr.p->usageCountR == 0 && tabPtr.p->usageCountW == 0);
3552   seizeAddfragrec(signal);
3553   addfragptr.p->m_dropTabReq = * req;
3554   dropTable_nextStep(signal, addfragptr);
3555 }
3556 
3557 void
execDROP_TAB_REF(Signal * signal)3558 Dblqh::execDROP_TAB_REF(Signal* signal)
3559 {
3560   jamEntry();
3561   DropTabRef * ref = (DropTabRef*)signal->getDataPtr();
3562 
3563 #if defined ERROR_INSERT || defined VM_TRACE
3564   jamLine(ref->errorCode);
3565   ndbabort();
3566 #endif
3567 
3568   Ptr<AddFragRecord> addFragPtr;
3569   addFragPtr.i = ref->senderData;
3570   ptrCheckGuard(addFragPtr, caddfragrecFileSize, addFragRecord);
3571   dropTable_nextStep(signal, addFragPtr);
3572 }
3573 
3574 void
execDROP_TAB_CONF(Signal * signal)3575 Dblqh::execDROP_TAB_CONF(Signal* signal)
3576 {
3577   jamEntry();
3578   DropTabConf * conf = (DropTabConf*)signal->getDataPtr();
3579 
3580   Ptr<AddFragRecord> addFragPtr;
3581   addFragPtr.i = conf->senderData;
3582   ptrCheckGuard(addFragPtr, caddfragrecFileSize, addFragRecord);
3583   dropTable_nextStep(signal, addFragPtr);
3584 }
3585 
3586 void
dropTable_nextStep(Signal * signal,Ptr<AddFragRecord> addFragPtr)3587 Dblqh::dropTable_nextStep(Signal* signal, Ptr<AddFragRecord> addFragPtr)
3588 {
3589   jam();
3590 
3591   TablerecPtr tabPtr;
3592   tabPtr.i = addFragPtr.p->m_dropTabReq.tableId;
3593   ptrCheckGuard(tabPtr, ctabrecFileSize, tablerec);
3594 
3595   Uint32 ref = 0;
3596   if (tabPtr.p->tableStatus == Tablerec::DROP_TABLE_WAIT_DONE)
3597   {
3598     jam();
3599     if (DictTabInfo::isTable(tabPtr.p->tableType) ||
3600         DictTabInfo::isHashIndex(tabPtr.p->tableType))
3601     {
3602       jam();
3603       ref = calcInstanceBlockRef(DBACC);
3604       tabPtr.p->tableStatus = Tablerec::DROP_TABLE_ACC;
3605       DEB_SCHEMA_VERSION(("(%u)tab: %u tableStatus = DROP_TABLE_ACC",
3606                           instance(),
3607                           tabPtr.i));
3608     }
3609     else
3610     {
3611       jam();
3612       ref = calcInstanceBlockRef(DBTUP);
3613       tabPtr.p->tableStatus = Tablerec::DROP_TABLE_TUP;
3614       DEB_SCHEMA_VERSION(("(%u)tab: %u tableStatus = DROP_TABLE_TUP",
3615                           instance(),
3616                           tabPtr.i));
3617     }
3618   }
3619   else if (tabPtr.p->tableStatus == Tablerec::DROP_TABLE_ACC)
3620   {
3621     jam();
3622     ref = calcInstanceBlockRef(DBTUP);
3623     tabPtr.p->tableStatus = Tablerec::DROP_TABLE_TUP;
3624     DEB_SCHEMA_VERSION(("(%u)tab: %u tableStatus = DROP_TABLE_TUP(2)",
3625                         instance(),
3626                         tabPtr.i));
3627   }
3628   else if (tabPtr.p->tableStatus == Tablerec::DROP_TABLE_TUP)
3629   {
3630     jam();
3631     if (DictTabInfo::isOrderedIndex(tabPtr.p->tableType))
3632     {
3633       jam();
3634       ref = calcInstanceBlockRef(DBTUX);
3635       tabPtr.p->tableStatus = Tablerec::DROP_TABLE_TUX;
3636       DEB_SCHEMA_VERSION(("(%u)tab: %u tableStatus = DROP_TABLE_TUX",
3637                           instance(),
3638                           tabPtr.i));
3639     }
3640   }
3641 
3642   if (ref)
3643   {
3644     jam();
3645     DropTabReq* req = (DropTabReq*)signal->getDataPtrSend();
3646     req->senderData = addFragPtr.i;
3647     req->senderRef = reference();
3648     req->tableId = tabPtr.i;
3649     req->tableVersion = tabPtr.p->schemaVersion;
3650     req->requestType = addFragPtr.p->m_dropTabReq.requestType;
3651     sendSignal(ref, GSN_DROP_TAB_REQ, signal,
3652                DropTabReq::SignalLength, JBB);
3653     return;
3654   }
3655 
3656   removeTable(tabPtr.i);
3657   tabPtr.p->m_addfragptr_i = RNIL;
3658   tabPtr.p->tableStatus = Tablerec::NOT_DEFINED;
3659   DEB_SCHEMA_VERSION(("(%u)tab: %u tableStatus = NOT_DEFINED",
3660                        instance(),
3661                        tabPtr.i));
3662 
3663   DropTabConf* conf = (DropTabConf*)signal->getDataPtrSend();
3664   conf->senderRef = reference();
3665   conf->senderData = addFragPtr.p->m_dropTabReq.senderData;
3666   conf->tableId = tabPtr.i;
3667   sendSignal(addFragPtr.p->m_dropTabReq.senderRef, GSN_DROP_TAB_CONF, signal,
3668              DropTabConf::SignalLength, JBB);
3669 
3670   addfragptr = addFragPtr;
3671   releaseAddfragrec(signal);
3672 }
3673 
removeTable(Uint32 tableId)3674 void Dblqh::removeTable(Uint32 tableId)
3675 {
3676   tabptr.i = tableId;
3677   ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
3678 
3679   for (Uint32 i = 0; i < NDB_ARRAY_SIZE(tabptr.p->fragid); i++) {
3680     jam();
3681     if (tabptr.p->fragid[i] != ZNIL) {
3682       jam();
3683       deleteFragrec(tabptr.p->fragid[i]);
3684     }//if
3685   }//for
3686 }//Dblqh::removeTable()
3687 
3688 /**
3689  * When adding a set of new columns to a table the row size grows.
3690  * This can have a bad effect on ongoing LCP scans. So therefore
3691  * we need to wait to change the table metadata until we are sure
3692  * that it is safe to change this parameter.
3693  *
3694  * It is safe if no LCP execution is ongoing on the table.
3695  *
3696  * It is safe when returning from executing an LCP since only one
3697  * at a time can execute an LCP (we can have another LCP in prepare
3698  * phase, but only one at a time in execution phase).
3699  *
3700  * We have also made it safe as soon as the LCP scan returns
3701  * a SCAN_FRAGCONF. We will check if it is necessary to change
3702  * the max record size of the table before we decide whether to
3703  * continue executing an LCP scan.
3704  *
3705  * The max record size is used to ensure that the LCP scan have
3706  * buffer space to receive at least 16 rows with maximum size.
3707  * This is checked before executing the next SCAN_NEXTREQ or SCAN_FRAGREQ,
3708  * so changing the max record size immediately after receiving SCAN_FRAGCONF
3709  * is a working solution. This means that at most we have to wait
3710  * for a scan of 16 rows, so normally the wait here should be very
3711  * small and practically unnoticeable for all practical purposes.
3712  *
3713  * The real-time break should not constitute any issue here since
3714  * we don't perform any action until it is safe to execute all
3715  * actions.
3716  */
3717 bool
handleLCPSurfacing(Signal * signal)3718 Dblqh::handleLCPSurfacing(Signal *signal)
3719 {
3720   if (!c_wait_lcp_surfacing)
3721   {
3722     jam();
3723     return false;
3724   }
3725   jam();
3726   c_wait_lcp_surfacing = false;
3727 
3728   DEB_LCP(("(%u)LCP surfaced, continue", instance()));
3729   AlterTabReq *req = (AlterTabReq*)signal->getDataPtr();
3730   *req = c_keep_alter_tab_req;
3731   const Uint32 tableId = req->tableId;
3732   const Uint32 newTableVersion = req->newTableVersion;
3733   const Uint32 senderRef = req->senderRef;
3734   const Uint32 senderData = req->senderData;
3735 
3736   TablerecPtr tablePtr;
3737   tablePtr.i = tableId;
3738   ptrCheckGuard(tablePtr, ctabrecFileSize, tablerec);
3739 
3740   tablePtr.p->schemaVersion = newTableVersion;
3741   DEB_SCHEMA_VERSION(("(%u)tab(%u): %u tableStatus = %u",
3742                       instance(),
3743                       tablePtr.p->schemaVersion,
3744                       tablePtr.i,
3745                       tablePtr.p->tableStatus));
3746   if (AlterTableReq::getReorgFragFlag(req->changeMask))
3747   {
3748     jam();
3749     commit_reorg(tablePtr);
3750   }
3751   Uint32 len = c_keep_alter_tab_req_len;
3752   EXECUTE_DIRECT(DBTUP, GSN_ALTER_TAB_REQ, signal, len);
3753   jamEntry();
3754 
3755   Uint32 errCode = signal->theData[0];
3756   Uint32 connectPtr = signal->theData[1];
3757   ndbrequire(errCode == 0);
3758 
3759   AlterTabConf* conf = (AlterTabConf*)signal->getDataPtrSend();
3760   conf->senderRef = reference();
3761   conf->senderData = senderData;
3762   conf->connectPtr = connectPtr;
3763   sendSignal(senderRef, GSN_ALTER_TAB_CONF, signal,
3764              AlterTabConf::SignalLength, JBB);
3765   return true;
3766 }
3767 
3768 void
execALTER_TAB_REQ(Signal * signal)3769 Dblqh::execALTER_TAB_REQ(Signal* signal)
3770 {
3771   jamEntry();
3772 
3773   if(!assembleFragments(signal))
3774     return;
3775 
3776   AlterTabReq copy = *(AlterTabReq*)signal->getDataPtr();
3777   const AlterTabReq* req = &copy;
3778   const Uint32 senderRef = req->senderRef;
3779   const Uint32 senderData = req->senderData;
3780   const Uint32 tableId = req->tableId;
3781   const Uint32 tableVersion = req->tableVersion;
3782   const Uint32 newTableVersion = req->newTableVersion;
3783   AlterTabReq::RequestType requestType =
3784     (AlterTabReq::RequestType) req->requestType;
3785 
3786   TablerecPtr tablePtr;
3787   tablePtr.i = tableId;
3788   ptrCheckGuard(tablePtr, ctabrecFileSize, tablerec);
3789 
3790   D("ALTER_TAB_REQ(LQH): requestType: " << requestType);
3791   Uint32 len = signal->getLength();
3792   switch (requestType) {
3793   case AlterTabReq::AlterTablePrepare:
3794     jam();
3795     break;
3796   case AlterTabReq::AlterTableRevert:
3797     jam();
3798     tablePtr.p->schemaVersion = tableVersion;
3799     DEB_SCHEMA_VERSION(("(%u)tab(%u): %u tableStatus = %u (2)",
3800                         instance(),
3801                         tablePtr.p->schemaVersion,
3802                         tablePtr.i,
3803                         tablePtr.p->tableStatus));
3804     break;
3805   case AlterTabReq::AlterTableCommit:
3806     jam();
3807     if (AlterTableReq::getAddAttrFlag(req->changeMask))
3808     {
3809       jam();
3810       lcpPtr.i = 0;
3811       ptrAss(lcpPtr, lcpRecord);
3812       if (lcpPtr.p->lcpRunState == LcpRecord::LCP_CHECKPOINTING &&
3813           tableId == lcpPtr.p->currentRunFragment.lcpFragOrd.tableId)
3814       {
3815         jam();
3816         /* See comment above on handleLCPSurfacing */
3817         DEB_LCP(("(%u)Wait for LCP to surface again", instance()));
3818         ndbrequire(!c_wait_lcp_surfacing);
3819         c_wait_lcp_surfacing = true;
3820         c_keep_alter_tab_req = copy;
3821         c_keep_alter_tab_req_len = len;
3822         return;
3823       }
3824     }
3825     tablePtr.p->schemaVersion = newTableVersion;
3826     DEB_SCHEMA_VERSION(("(%u)tab(%u): %u tableStatus = %u (3)",
3827                         instance(),
3828                         tablePtr.p->schemaVersion,
3829                         tablePtr.i,
3830                         tablePtr.p->tableStatus));
3831     if (AlterTableReq::getReorgFragFlag(req->changeMask))
3832     {
3833       jam();
3834       commit_reorg(tablePtr);
3835     }
3836     break;
3837   case AlterTabReq::AlterTableComplete:
3838     jam();
3839     break;
3840   case AlterTabReq::AlterTableSumaEnable:
3841     jam();
3842     break;
3843   case AlterTabReq::AlterTableSumaFilter:
3844     jam();
3845     signal->theData[len++] = cnewestGci + 3;
3846     break;
3847   case AlterTabReq::AlterTableReadOnly:
3848     jam();
3849     ndbrequire(tablePtr.p->tableStatus == Tablerec::TABLE_DEFINED);
3850     tablePtr.p->tableStatus = Tablerec::TABLE_READ_ONLY;
3851     DEB_SCHEMA_VERSION(("(%u)tab: %u tableStatus = TABLE_READ_ONLY",
3852                         instance(),
3853                         tablePtr.i));
3854     signal->theData[0] = ZWAIT_READONLY;
3855     signal->theData[1] = tablePtr.i;
3856     signal->theData[2] = senderRef;
3857     signal->theData[3] = senderData;
3858     sendSignal(reference(), GSN_CONTINUEB, signal, 4, JBB);
3859     return;
3860   case AlterTabReq::AlterTableReadWrite:
3861     jam();
3862     ndbrequire(tablePtr.p->tableStatus == Tablerec::TABLE_READ_ONLY);
3863     tablePtr.p->tableStatus = Tablerec::TABLE_DEFINED;
3864     DEB_SCHEMA_VERSION(("(%u)tab: %u tableStatus = TABLE_DEFINED(2)",
3865                         instance(),
3866                         tablePtr.i));
3867     break;
3868   default:
3869     ndbabort();
3870   }
3871 
3872   EXECUTE_DIRECT(DBTUP, GSN_ALTER_TAB_REQ, signal, len);
3873   jamEntry();
3874 
3875   Uint32 errCode = signal->theData[0];
3876   Uint32 connectPtr = signal->theData[1];
3877   if (errCode == 0)
3878   {
3879     // Request handled successfully
3880     AlterTabConf* conf = (AlterTabConf*)signal->getDataPtrSend();
3881     conf->senderRef = reference();
3882     conf->senderData = senderData;
3883     conf->connectPtr = connectPtr;
3884     sendSignal(senderRef, GSN_ALTER_TAB_CONF, signal,
3885                AlterTabConf::SignalLength, JBB);
3886   }
3887   else if (errCode == ~Uint32(0))
3888   {
3889     /**
3890      * Wait
3891      */
3892     ndbrequire(requestType == AlterTabReq::AlterTableSumaFilter);
3893     signal->theData[0] = ZWAIT_REORG_SUMA_FILTER_ENABLED;
3894     signal->theData[1] = cnewestGci + 3;
3895     signal->theData[2] = senderData;
3896     signal->theData[3] = connectPtr;
3897     signal->theData[4] = senderRef;
3898     wait_reorg_suma_filter_enabled(signal);
3899     return;
3900   }
3901   else
3902   {
3903     jam();
3904     AlterTabRef* ref = (AlterTabRef*)signal->getDataPtrSend();
3905     ref->senderRef = reference();
3906     ref->senderData = senderData;
3907     ref->connectPtr = connectPtr;
3908     ref->errorCode = errCode;
3909     sendSignal(senderRef, GSN_ALTER_TAB_REF, signal,
3910                AlterTabRef::SignalLength, JBB);
3911   }
3912 }
3913 
3914 void
wait_reorg_suma_filter_enabled(Signal * signal)3915 Dblqh::wait_reorg_suma_filter_enabled(Signal* signal)
3916 {
3917   if (cnewestCompletedGci >= signal->theData[1])
3918   {
3919     jam();
3920     Uint32 senderData = signal->theData[2];
3921     Uint32 connectPtr = signal->theData[3];
3922     Uint32 senderRef = signal->theData[4];
3923 
3924     D("ALTER_TAB_CONF after suma filter enabled");
3925     AlterTabConf* conf = (AlterTabConf*)signal->getDataPtrSend();
3926     conf->senderRef = reference();
3927     conf->senderData = senderData;
3928     conf->connectPtr = connectPtr;
3929     sendSignal(senderRef, GSN_ALTER_TAB_CONF, signal,
3930                AlterTabConf::SignalLength, JBB);
3931     return;
3932   }
3933   sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 500, 5);
3934 }
3935 
3936 void
commit_reorg(TablerecPtr tablePtr)3937 Dblqh::commit_reorg(TablerecPtr tablePtr)
3938 {
3939   for (Uint32 i = 0; i < NDB_ARRAY_SIZE(tablePtr.p->fragrec); i++)
3940   {
3941     jam();
3942     Ptr<Fragrecord> fragPtr;
3943     if ((fragPtr.i = tablePtr.p->fragrec[i]) != RNIL)
3944     {
3945       jam();
3946       c_fragment_pool.getPtr(fragPtr);
3947       fragPtr.p->fragDistributionKey = (fragPtr.p->fragDistributionKey+1)&0xFF;
3948     }
3949   }
3950 }
3951 
3952 void
wait_readonly(Signal * signal)3953 Dblqh::wait_readonly(Signal* signal)
3954 {
3955   jam();
3956 
3957   Uint32 tableId = signal->theData[1];
3958 
3959   TablerecPtr tablePtr;
3960   tablePtr.i = tableId;
3961   ptrCheckGuard(tablePtr, ctabrecFileSize, tablerec);
3962   ndbrequire(tablePtr.p->tableStatus == Tablerec::TABLE_READ_ONLY);
3963 
3964   if (tablePtr.p->usageCountW > 0)
3965   {
3966     jam();
3967     sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 3000,
3968                         signal->getLength());
3969     return;
3970   }
3971 
3972   Uint32 senderRef = signal->theData[2];
3973   Uint32 senderData = signal->theData[3];
3974 
3975   // Request handled successfully
3976   AlterTabConf * conf = (AlterTabConf*)signal->getDataPtrSend();
3977   conf->senderRef = reference();
3978   conf->senderData = senderData;
3979   sendSignal(senderRef, GSN_ALTER_TAB_CONF, signal,
3980 	     AlterTabConf::SignalLength, JBB);
3981 }
3982 
3983 /* ************************************************************************>>
3984  * TIME_SIGNAL: Handles time-out of local operations. This is a clean-up
3985  * handler. If no other measure has succeeded in cleaning up after time-outs
3986  * or else then this routine will remove the transaction after 120 seconds of
3987  * inactivity. The check is performed once per 40 milliseconds.
3988  * ************************************************************************>> */
3989 #define LQH_TIME_SIGNAL_DELAY 40
execTIME_SIGNAL(Signal * signal)3990 void Dblqh::execTIME_SIGNAL(Signal* signal)
3991 {
3992   jamEntry();
3993 
3994   const NDB_TICKS currentTime = NdbTick_getCurrentTicks();
3995   Uint64 num_ms_elapsed = elapsed_time(signal,
3996                                        currentTime,
3997                                        c_latestTIME_SIGNAL,
3998                                        Uint32(LQH_TIME_SIGNAL_DELAY));
3999   sendTIME_SIGNAL(signal, currentTime, Uint32(LQH_TIME_SIGNAL_DELAY));
4000 
4001   /**
4002    * timer_handling will effectively call tick in the IOTracker and
4003    * this in turn will only do something every 128ms, so this means
4004    * that with a maximum of 8 runs per execTIME_SIGNAL we can at most
4005    * have one call to tick that actually does something useful.
4006    * We avoid using the same delay as in DBTC to avoid running the
4007    * delay handling code at the same time.
4008    *
4009    * The idea of calling timer_handling logically once per 10ms is
4010    * a remnant from when TIME_SIGNAL was generated by QMGR, this could
4011    * be changed in the future.
4012    */
4013   c_elapsed_time_millis += num_ms_elapsed;
4014   while (c_elapsed_time_millis > Uint64(10))
4015   {
4016     jam();
4017     c_elapsed_time_millis -= Uint64(10);
4018     timer_handling(signal);
4019   }
4020 }
4021 
4022 bool
getNextTcConRec(Uint32 & next,TcConnectionrecPtr & tcConnectptr,Uint32 max_loops)4023 Dblqh::getNextTcConRec(Uint32 &next,
4024                        TcConnectionrecPtr &tcConnectptr,
4025                        Uint32 max_loops)
4026 {
4027   Uint32 found = 0;
4028   Uint32 loop_count = 0;
4029   while (found == 0 && next != RNIL &&
4030          (max_loops == 0 || loop_count < max_loops))
4031   {
4032     found = tcConnect_pool.getUncheckedPtrs(&next, &tcConnectptr, 1);
4033     if (found > 0 &&
4034         !Magic::check_ptr(tcConnectptr.p))
4035       found = 0;
4036     loop_count++;
4037   }
4038   return (found > 0);
4039 }
4040 
4041 bool
getNextScanRec(Uint32 & next,ScanRecordPtr & loc_scanptr,Uint32 max_loops)4042 Dblqh::getNextScanRec(Uint32 &next,
4043                       ScanRecordPtr &loc_scanptr,
4044                       Uint32 max_loops)
4045 {
4046   Uint32 found = 0;
4047   Uint32 loop_count = 0;
4048   while (found == 0 && next != RNIL &&
4049          (max_loops == 0 || loop_count < max_loops))
4050   {
4051     found = c_scanRecordPool.getUncheckedPtrs(&next, &loc_scanptr, 1);
4052     if (found > 0 &&
4053         !Magic::check_ptr(loc_scanptr.p))
4054       found = 0;
4055     loop_count++;
4056   }
4057   return (found > 0);
4058 }
4059 
4060 bool
getNextCommitAckMarker(Uint32 & next,CommitAckMarkerPtr & commitAckMarkerPtr,Uint32 max_loops)4061 Dblqh::getNextCommitAckMarker(Uint32 &next,
4062                               CommitAckMarkerPtr &commitAckMarkerPtr,
4063                               Uint32 max_loops)
4064 {
4065   Uint32 found = 0;
4066   Uint32 loop_count = 0;
4067   while (found == 0 && next != RNIL &&
4068          (max_loops == 0 || loop_count < max_loops))
4069   {
4070     found = m_commitAckMarkerPool.getUncheckedPtrs(&next,
4071                                                    &commitAckMarkerPtr,
4072                                                    1);
4073     if (found > 0 &&
4074         !Magic::check_ptr(commitAckMarkerPtr.p))
4075       found = 0;
4076     loop_count++;
4077   }
4078   return (found > 0);
4079 }
4080 
timer_handling(Signal * signal)4081 void Dblqh::timer_handling(Signal *signal)
4082 {
4083   cLqhTimeOutCount++;
4084   cLqhTimeOutCheckCount++;
4085 
4086   for (logPartPtr.i = 0; logPartPtr.i < clogPartFileSize; logPartPtr.i++)
4087   {
4088     jam();
4089     ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
4090     int ret = logPartPtr.p->m_io_tracker.tick(10 * cLqhTimeOutCount,
4091                                               c_max_redo_lag,
4092                                               c_max_redo_lag_counter);
4093     if (ret < 0)
4094     {
4095       /**
4096        * set problem
4097        */
4098       update_log_problem(signal, logPartPtr,
4099                          LogPartRecord::P_REDO_IO_PROBLEM, true);
4100     }
4101     else if (ret > 0)
4102     {
4103       /**
4104        * clear
4105        */
4106       update_log_problem(signal, logPartPtr,
4107                          LogPartRecord::P_REDO_IO_PROBLEM, false);
4108     }
4109   }
4110 
4111   if (cLqhTimeOutCheckCount < 1000) {
4112     jam();
4113     return;
4114   }//if
4115 
4116   cLqhTimeOutCheckCount = 0;
4117 #ifdef DEBUG_TRANSACTION_TIMEOUT
4118 #ifdef VM_TRACE
4119   TcConnectionrecPtr tTcConptr;
4120   Uint32 next = 0;
4121   do
4122   {
4123     bool found = getNextTcConRec(next, tTcConptr, 0);
4124     if (found &&
4125         (tTcConptr.p->tcTimer != 0) &&
4126 	((tTcConptr.p->tcTimer + 12000) < cLqhTimeOutCount))
4127     {
4128       ndbout << "Dblqh::execTIME_SIGNAL"<<endl
4129 	     << "Timeout found in tcConnectRecord " <<tTcConptr.i<<endl
4130 	     << " cLqhTimeOutCount = " << cLqhTimeOutCount << endl
4131 	     << " tcTimer="<<tTcConptr.p->tcTimer<<endl
4132 	     << " tcTimer+12000="<<tTcConptr.p->tcTimer + 12000<<endl;
4133 
4134       signal->theData[0] = DumpStateOrd::LqhDumpAllTcRec;
4135       signal->theData[1] = tTcConptr.i;
4136       execDUMP_STATE_ORD(signal);
4137 
4138       // Reset the timer
4139       tTcConptr.p->tcTimer = 0;
4140     }//if
4141   } while (next != RNIL);
4142 #endif
4143 #ifdef VM_TRACE
4144   for (lfoPtr.i = 0; lfoPtr.i < clfoFileSize; lfoPtr.i++) {
4145     ptrAss(lfoPtr, logFileOperationRecord);
4146     if ((lfoPtr.p->lfoTimer != 0) &&
4147         ((lfoPtr.p->lfoTimer + 12000) < cLqhTimeOutCount)) {
4148       ndbout << "We have lost LFO record" << endl;
4149       ndbout << "index = " << lfoPtr.i;
4150       ndbout << "State = " << lfoPtr.p->lfoState;
4151       ndbout << " Page No = " << lfoPtr.p->lfoPageNo;
4152       ndbout << " noPagesRw = " << lfoPtr.p->noPagesRw;
4153       ndbout << "lfoWordWritten = " << lfoPtr.p->lfoWordWritten << endl;
4154       lfoPtr.p->lfoTimer = cLqhTimeOutCount;
4155     }//if
4156   }//for
4157 
4158 #endif
4159 #endif
4160 #if 0
4161   LcpRecordPtr TlcpPtr;
4162   // Print information about the current local checkpoint
4163   TlcpPtr.i = 0;
4164   ptrAss(TlcpPtr, lcpRecord);
4165   ndbout << "Information about LCP in this LQH" << endl
4166 	 << "  lcpState=" << TlcpPtr.p->lcpState << endl
4167 	 << "   firstLcpLocAcc=" << TlcpPtr.p->firstLcpLocAcc << endl
4168 	 << "   firstLcpLocTup=" << TlcpPtr.p->firstLcpLocTup << endl
4169 	 << "   lcpAccptr=" << TlcpPtr.p->lcpAccptr << endl
4170 	 << "   lastFragmentFlag=" << TlcpPtr.p->lastFragmentFlag << endl
4171 #endif
4172 }//Dblqh::execTIME_SIGNAL()
4173 
4174 /* ######################################################################### */
4175 /* #######                  EXECUTION MODULE                         ####### */
4176 /* THIS MODULE HANDLES THE RECEPTION OF LQHKEYREQ AND ALL PROCESSING         */
4177 /* OF OPERATIONS ON BEHALF OF THIS REQUEST. THIS DOES ALSO INVOLVE           */
4178 /* RECEPTION OF VARIOUS TYPES OF ATTRINFO AND KEYINFO. IT DOES ALSO          */
4179 /* INVOLVE COMMUNICATION WITH ACC AND TUP.                                   */
4180 /* ######################################################################### */
4181 
4182 /**
4183  * earlyKeyReqAbort
4184  *
4185  * Exit early from handling an LQHKEYREQ request.
4186  * Method determines which resources (if any) need freed, then
4187  * signals requestor with error response.
4188  * * Verify all required resources are freed if adding new callers *
4189  */
earlyKeyReqAbort(Signal * signal,const LqhKeyReq * lqhKeyReq,bool isLongReq,Uint32 errCode,const TcConnectionrecPtr tcConnectptr)4190 void Dblqh::earlyKeyReqAbort(Signal* signal,
4191                              const LqhKeyReq * lqhKeyReq,
4192                              bool isLongReq,
4193                              Uint32 errCode,
4194                              const TcConnectionrecPtr tcConnectptr)
4195 {
4196   jamEntry();
4197   const Uint32 transid1  = lqhKeyReq->transId1;
4198   const Uint32 transid2  = lqhKeyReq->transId2;
4199   Uint32 treqInfo        = lqhKeyReq->requestInfo;
4200 
4201   if (!isLongReq)
4202   {
4203     jam();
4204     /* The inlined AI length does not matter here.  Zero it to avoid
4205      * interpretation as 7.x bits (getNormalProtocolFlag).
4206      * bug#14702377
4207      */
4208     LqhKeyReq::clearAIInLqhKeyReq(treqInfo);
4209   }
4210   const Uint32 reqInfo   = treqInfo;
4211 
4212   bool tcConnectRecAllocated = (tcConnectptr.i != RNIL);
4213 
4214   if (tcConnectRecAllocated)
4215   {
4216     jam();
4217 
4218     /* Could have a commit-ack marker allocated. */
4219     remove_commit_marker(tcConnectptr.p);
4220 
4221     /* Could have long key/attr sections linked */
4222     ndbrequire(tcConnectptr.p->m_dealloc_state == TcConnectionrec::DA_IDLE);
4223     ndbrequire(tcConnectptr.p->m_dealloc_data.m_unused == RNIL);
4224     releaseOprec(signal, tcConnectptr);
4225 
4226     /*
4227      * Free the TcConnectRecord, ensuring that the
4228      * table reference counts have not been incremented and
4229      * so will not be decremented.
4230      * Also verify that we're not present in the transid
4231      * hash
4232      */
4233     ndbrequire(tcConnectptr.p->tableref == RNIL);
4234     /* Following is not 100% check, but a reasonable guard */
4235     ndbrequire(tcConnectptr.p->nextHashRec == RNIL);
4236     ndbrequire(tcConnectptr.p->prevHashRec == RNIL);
4237     releaseTcrec(signal, tcConnectptr);
4238   }
4239 
4240   /* Now perform signalling */
4241 
4242   if (LqhKeyReq::getDirtyFlag(reqInfo) &&
4243       LqhKeyReq::getOperation(reqInfo) == ZREAD &&
4244       !LqhKeyReq::getNormalProtocolFlag(reqInfo)){
4245     jam();
4246     /* Dirty read sends TCKEYREF direct to client, and nothing to TC */
4247     ndbrequire(LqhKeyReq::getApplicationAddressFlag(reqInfo));
4248     const Uint32 apiRef   = lqhKeyReq->variableData[0];
4249     const Uint32 apiOpRec = lqhKeyReq->variableData[1];
4250 
4251     TcKeyRef * const tcKeyRef = (TcKeyRef *) signal->getDataPtrSend();
4252 
4253     tcKeyRef->connectPtr = apiOpRec;
4254     tcKeyRef->transId[0] = transid1;
4255     tcKeyRef->transId[1] = transid2;
4256     tcKeyRef->errorCode = errCode;
4257     sendTCKEYREF(signal, apiRef, lqhKeyReq->tcBlockref, 0);
4258   } else {
4259     jam();
4260     /* All ops apart from dirty read send LQHKEYREF to TC
4261      * (This includes simple read)
4262      */
4263 
4264     const Uint32 clientPtr = lqhKeyReq->clientConnectPtr;
4265     Uint32 TcOprec = clientPtr;
4266     if(LqhKeyReq::getSameClientAndTcFlag(reqInfo) == 1){
4267       if(LqhKeyReq::getApplicationAddressFlag(reqInfo))
4268 	TcOprec = lqhKeyReq->variableData[2];
4269       else
4270 	TcOprec = lqhKeyReq->variableData[0];
4271     }
4272 
4273     LqhKeyRef * const ref = (LqhKeyRef*)signal->getDataPtrSend();
4274     ref->userRef = clientPtr;
4275     ref->connectPtr = TcOprec;
4276     ref->errorCode = errCode;
4277     ref->transId1 = transid1;
4278     ref->transId2 = transid2;
4279     Uint32 block = refToMain(signal->senderBlockRef());
4280     if (block != RESTORE)
4281     {
4282       sendSignal(signal->senderBlockRef(), GSN_LQHKEYREF, signal,
4283 	         LqhKeyRef::SignalLength, JBB);
4284     }
4285     else
4286     {
4287       ndbrequire(refToNode(signal->senderBlockRef()) == cownNodeid &&
4288                  refToInstance(signal->senderBlockRef()) == instance());
4289       EXECUTE_DIRECT(RESTORE, GSN_LQHKEYREF,
4290                      signal, LqhKeyRef::SignalLength);
4291     }
4292   }//if
4293   return;
4294 }//Dblqh::earlyKeyReqAbort()
4295 
4296 Uint32
get_table_state_error(Ptr<Tablerec> tabPtr) const4297 Dblqh::get_table_state_error(Ptr<Tablerec> tabPtr) const
4298 {
4299   switch(tabPtr.p->tableStatus){
4300   case Tablerec::NOT_DEFINED:
4301     jam();
4302     return ZTABLE_NOT_DEFINED;
4303     break;
4304   case Tablerec::ADD_TABLE_ONGOING:
4305     jam();
4306     return ZDROP_TABLE_IN_PROGRESS;
4307   case Tablerec::PREP_DROP_TABLE_DONE:
4308     jam();
4309     return ZDROP_TABLE_IN_PROGRESS;
4310   case Tablerec::DROP_TABLE_WAIT_USAGE:
4311     jam();
4312     return ZDROP_TABLE_IN_PROGRESS;
4313   case Tablerec::DROP_TABLE_WAIT_DONE:
4314     jam();
4315     return ZDROP_TABLE_IN_PROGRESS;
4316   case Tablerec::DROP_TABLE_ACC:
4317     jam();
4318     return ZDROP_TABLE_IN_PROGRESS;
4319   case Tablerec::DROP_TABLE_TUP:
4320     jam();
4321     return ZDROP_TABLE_IN_PROGRESS;
4322   case Tablerec::DROP_TABLE_TUX:
4323     jam();
4324     return ZDROP_TABLE_IN_PROGRESS;
4325   case Tablerec::TABLE_DEFINED:
4326   case Tablerec::TABLE_READ_ONLY:
4327     ndbabort();
4328     return ZTABLE_NOT_DEFINED;
4329   }
4330   ndbabort();
4331   return ~Uint32(0);
4332 }
4333 
4334 int
check_tabstate(Signal * signal,const Tablerec * tablePtrP,Uint32 op,const TcConnectionrecPtr tcConnectptr)4335 Dblqh::check_tabstate(Signal * signal,
4336                       const Tablerec * tablePtrP,
4337                       Uint32 op,
4338                       const TcConnectionrecPtr tcConnectptr)
4339 {
4340   if (tabptr.p->tableStatus == Tablerec::TABLE_READ_ONLY)
4341   {
4342     jam();
4343     if (op == ZREAD || op == ZREAD_EX || op == ZUNLOCK)
4344     {
4345       jam();
4346       return 0;
4347     }
4348     terrorCode = ZTABLE_READ_ONLY;
4349   }
4350   else
4351   {
4352     jam();
4353     terrorCode = get_table_state_error(tabptr);
4354   }
4355   abortErrorLab(signal, tcConnectptr);
4356   return 1;
4357 }
4358 
LQHKEY_abort(Signal * signal,int errortype,const TcConnectionrecPtr tcConnectptr)4359 void Dblqh::LQHKEY_abort(Signal* signal,
4360                          int errortype,
4361                          const TcConnectionrecPtr tcConnectptr)
4362 {
4363   switch (errortype) {
4364   case 0:
4365     jam();
4366     terrorCode = ZCOPY_NODE_ERROR;
4367     break;
4368   case 1:
4369     jam();
4370     terrorCode = ZNO_FREE_LQH_CONNECTION;
4371     break;
4372   case 2:
4373     jam();
4374     terrorCode = signal->theData[1];
4375     break;
4376   case 3:
4377     jam();
4378     ndbrequire((tcConnectptr.p->transactionState == TcConnectionrec::WAIT_ACC_ABORT) ||
4379                (tcConnectptr.p->transactionState == TcConnectionrec::ABORT_QUEUED));
4380     return;
4381     break;
4382   case 4:
4383     jam();
4384     terrorCode = get_table_state_error(tabptr);
4385     break;
4386   case 5:
4387     jam();
4388     terrorCode = ZINVALID_SCHEMA_VERSION;
4389     break;
4390   default:
4391     ndbabort();
4392   }//switch
4393   abortErrorLab(signal, tcConnectptr);
4394 }//Dblqh::LQHKEY_abort()
4395 
LQHKEY_error(Signal * signal,int errortype)4396 void Dblqh::LQHKEY_error(Signal* signal, int errortype)
4397 {
4398   switch (errortype) {
4399   case 0:
4400     jam();
4401     break;
4402   case 1:
4403     jam();
4404     break;
4405   case 2:
4406     jam();
4407     break;
4408   case 3:
4409     jam();
4410     break;
4411   case 4:
4412     jam();
4413     break;
4414   case 5:
4415     jam();
4416     break;
4417   case 6:
4418     jam();
4419     break;
4420   default:
4421     jam();
4422     break;
4423   }//switch
4424   ndbabort();
4425 }//Dblqh::LQHKEY_error()
4426 
execLQHKEYREF(Signal * signal)4427 void Dblqh::execLQHKEYREF(Signal* signal)
4428 {
4429   jamEntry();
4430   TcConnectionrecPtr tcConnectptr;
4431   tcConnectptr.i = signal->theData[0];
4432   Uint32 tcOprec  = signal->theData[1];
4433   terrorCode = signal->theData[2];
4434   Uint32 transid1 = signal->theData[3];
4435   Uint32 transid2 = signal->theData[4];
4436   if (!tcConnect_pool.getValidPtr(tcConnectptr))
4437   {
4438     jam();
4439     warningReport(signal, 13);
4440     return;
4441   }//if
4442   m_tc_connect_ptr = tcConnectptr;
4443   TcConnectionrec * const regTcPtr = tcConnectptr.p;
4444 
4445   if (likely(! ((regTcPtr->connectState == TcConnectionrec::LOG_CONNECTED) ||
4446                 (regTcPtr->connectState == TcConnectionrec::COPY_CONNECTED))))
4447   {
4448     /**
4449      * This...is unpleasant...
4450      *   LOG_CONNECTED and COPY_CONNECTED will not release there tcConnectptr
4451      *   before all outstanding is finished.
4452      *
4453      *   CONNECTED on the other hand can, (in ::execABORT)
4454      *     which means that findTransaction *should* be used
4455      *     to make sure that correct tcConnectptr is accessed.
4456      *
4457      *   However, as LOG_CONNECTED & COPY_CONNECTED only uses 1 tcConnectptr
4458      *     (and fiddles) with transid and other stuff, I could
4459      *     not find an easy way to modify the code so that findTransaction
4460      *     is usable also for them
4461      */
4462     if (findTransaction(transid1, transid2, tcOprec, 0, tcConnectptr) != ZOK)
4463     {
4464       jam();
4465       warningReport(signal, 14);
4466       return;
4467     }
4468   }
4469 
4470   switch (regTcPtr->connectState) {
4471   case TcConnectionrec::CONNECTED:
4472     jam();
4473     if (regTcPtr->abortState != TcConnectionrec::ABORT_IDLE) {
4474       warningReport(signal, 15);
4475       return;
4476     }//if
4477     abortErrorLab(signal, tcConnectptr);
4478     return;
4479   case TcConnectionrec::LOG_CONNECTED:
4480     jam();
4481     logLqhkeyrefLab(signal, tcConnectptr);
4482     return;
4483   case TcConnectionrec::COPY_CONNECTED:
4484     jam();
4485     setup_scan_pointers_from_tc_con(tcConnectptr);
4486     copyLqhKeyRefLab(signal, tcConnectptr);
4487     return;
4488   default:
4489     warningReport(signal, 16);
4490     return;
4491   }//switch
4492 }//Dblqh::execLQHKEYREF()
4493 
4494 /* -------------------------------------------------------------------------- */
4495 /* -------                       ENTER PACKED_SIGNAL                  ------- */
4496 /* Execution of packed signal. The packed signal can contain COMMIT, COMPLETE */
4497 /* or LQHKEYCONF signals. These signals will be executed by their resp. exec  */
4498 /* functions.                                                                 */
4499 /* -------------------------------------------------------------------------- */
execPACKED_SIGNAL(Signal * signal)4500 void Dblqh::execPACKED_SIGNAL(Signal* signal)
4501 {
4502   Uint32 Tstep = 0;
4503   Uint32 Tlength;
4504   Uint32 TpackedData[28];
4505   Uint32 sig0, sig1, sig2, sig3 ,sig4, sig5, sig6;
4506 
4507   jamEntry();
4508   Tlength = signal->length();
4509   Uint32 TsenderRef = signal->getSendersBlockRef();
4510   Uint32 TcommitLen = 5;
4511   Uint32 Tgci_lo_mask = ~(Uint32)0;
4512 
4513 #ifdef ERROR_INSERT
4514   Uint32 senderBlockRef = signal->getSendersBlockRef();
4515 #endif
4516 
4517   ndbrequire(Tlength <= 25);
4518   MEMCOPY_NO_WORDS(&TpackedData[0], &signal->theData[0], Tlength);
4519 
4520   if (VERIFY_PACKED_RECEIVE)
4521   {
4522     ndbrequire(PackedSignal::verify(&TpackedData[0],
4523                                     Tlength,
4524                                     cownref,
4525                                     LQH_RECEIVE_TYPES,
4526                                     TcommitLen));
4527   }
4528 
4529   while (Tlength > Tstep) {
4530     switch (TpackedData[Tstep] >> 28) {
4531     case ZCOMMIT:
4532       jam();
4533       sig0 = TpackedData[Tstep + 0] & 0x0FFFFFFF;
4534       sig1 = TpackedData[Tstep + 1];
4535       sig2 = TpackedData[Tstep + 2];
4536       sig3 = TpackedData[Tstep + 3];
4537       sig4 = TpackedData[Tstep + 4];
4538       signal->theData[0] = sig0;
4539       signal->theData[1] = sig1;
4540       signal->theData[2] = sig2;
4541       signal->theData[3] = sig3;
4542       signal->theData[4] = sig4 & Tgci_lo_mask;
4543       signal->header.theLength = TcommitLen;
4544       jamBuffer()->markEndOfSigExec();
4545       execCOMMIT(signal);
4546       Tstep += TcommitLen;
4547       break;
4548     case ZCOMPLETE:
4549       jam();
4550       sig0 = TpackedData[Tstep + 0] & 0x0FFFFFFF;
4551       sig1 = TpackedData[Tstep + 1];
4552       sig2 = TpackedData[Tstep + 2];
4553       signal->theData[0] = sig0;
4554       signal->theData[1] = sig1;
4555       signal->theData[2] = sig2;
4556       signal->header.theLength = 3;
4557       jamBuffer()->markEndOfSigExec();
4558       execCOMPLETE(signal);
4559       Tstep += 3;
4560       break;
4561     case ZLQHKEYCONF: {
4562       jam();
4563       LqhKeyConf * lqhKeyConf = CAST_PTR(LqhKeyConf, signal->theData);
4564       sig0 = TpackedData[Tstep + 0] & 0x0FFFFFFF;
4565       sig1 = TpackedData[Tstep + 1];
4566       sig2 = TpackedData[Tstep + 2];
4567       sig3 = TpackedData[Tstep + 3];
4568       sig4 = TpackedData[Tstep + 4];
4569       sig5 = TpackedData[Tstep + 5];
4570       sig6 = TpackedData[Tstep + 6];
4571       lqhKeyConf->connectPtr = sig0;
4572       lqhKeyConf->opPtr = sig1;
4573       lqhKeyConf->userRef = sig2;
4574       lqhKeyConf->readLen = sig3;
4575       lqhKeyConf->transId1 = sig4;
4576       lqhKeyConf->transId2 = sig5;
4577       lqhKeyConf->numFiredTriggers = sig6;
4578       jamBuffer()->markEndOfSigExec();
4579       execLQHKEYCONF(signal);
4580       Tstep += LqhKeyConf::SignalLength;
4581       break;
4582     }
4583     case ZREMOVE_MARKER:
4584       jam();
4585       sig0 = TpackedData[Tstep + 1];
4586       sig1 = TpackedData[Tstep + 2];
4587       signal->theData[0] = sig0;
4588       signal->theData[1] = sig1;
4589       if ((TpackedData[Tstep] & 1) == 0)
4590       {
4591         /**
4592          * This is the normal path where we remove a marker
4593          * after commit.
4594          */
4595         signal->header.theLength = 2;
4596       }
4597       else
4598       {
4599         /**
4600          * This is a new path that is used when removing a marker
4601          * after an API node failure. We indicate this in packed
4602          * signal by setting one of the 28 unused bits in the
4603          * packed signal (the first word only uses the last 4 bits
4604          * in the first 32-bit word.
4605          *
4606          * We indicate this to the execREMOVE_MARKER_ORD method
4607          * by setting the Length of the signal to 3 (we cannot
4608          * add an extra parameter since the signal can be sent
4609          * directly and not through the packed signal interface.
4610          */
4611         signal->header.theLength = 3;
4612       }
4613       jamBuffer()->markEndOfSigExec();
4614       execREMOVE_MARKER_ORD(signal);
4615       Tstep += 3;
4616       break;
4617     case ZFIRE_TRIG_REQ:
4618       jam();
4619       ndbassert(FireTrigReq::SignalLength == 4);
4620       sig0 = TpackedData[Tstep + 0] & 0x0FFFFFFF;
4621       sig1 = TpackedData[Tstep + 1];
4622       sig2 = TpackedData[Tstep + 2];
4623       sig3 = TpackedData[Tstep + 3];
4624       signal->theData[0] = sig0;
4625       signal->theData[1] = sig1;
4626       signal->theData[2] = sig2;
4627       signal->theData[3] = sig3;
4628       signal->header.theLength = FireTrigReq::SignalLength;
4629       signal->header.theSendersBlockRef = TsenderRef;
4630       jamBuffer()->markEndOfSigExec();
4631       execFIRE_TRIG_REQ(signal);
4632       Tstep += FireTrigReq::SignalLength;
4633       break;
4634     default:
4635       ndbabort();
4636       return;
4637     }//switch
4638 #ifdef ERROR_INSERT
4639     signal->header.theSendersBlockRef = senderBlockRef;
4640 #endif
4641   }//while
4642   ndbrequire(Tlength == Tstep);
4643   return;
4644 }//Dblqh::execPACKED_SIGNAL()
4645 
4646 void
execREMOVE_MARKER_ORD(Signal * signal)4647 Dblqh::execREMOVE_MARKER_ORD(Signal* signal)
4648 {
4649   CommitAckMarker key;
4650   key.transid1 = signal->theData[0];
4651   key.transid2 = signal->theData[1];
4652   bool removed_by_fail_api = (signal->header.theLength == 3);
4653   jamEntry();
4654 
4655   CommitAckMarkerPtr removedPtr;
4656   m_commitAckMarkerHash.remove(removedPtr, key);
4657   if (removedPtr.i != RNIL)
4658   {
4659     jam();
4660     ndbrequire(removedPtr.p->in_hash);
4661     removedPtr.p->in_hash = false;
4662     removedPtr.p->reference_count = 0;
4663     removedPtr.p->removed_by_fail_api = removed_by_fail_api;
4664     m_commitAckMarkerPool.release(removedPtr);
4665     checkPoolShrinkNeed(DBLQH_COMMIT_ACK_MARKER_TRANSIENT_POOL_INDEX,
4666                         m_commitAckMarkerPool);
4667   }
4668   else
4669   {
4670     /**
4671      * This can happen in a special situation. This is when a large transaction
4672      * commits. As it decides to commit it sends of the commit decision to the
4673      * API. As soon as the API receives this it will send a TC_COMMIT_ACK
4674      * message back to DBTC. This message could arrive before the transaction
4675      * is fully completed. When the TC_COMMIT_ACK it is received it is
4676      * immediately transferred to the DBLQH's of the transaction owning the
4677      * commit ack markers. Next if the node where DBTC resides fails after
4678      * completing the sending of the TC_COMMIT_ACK, but before the transaction
4679      * is completed. This can happen in cases with a large transaction doing a
4680      * commit.
4681      *
4682      * Next step that happens is that a new DBTC takes over the transaction to
4683      * complete it. It will complete the parts remaining and since all parts
4684      * heard of were in the Committed state, the transaction will be committed
4685      * and a TCKEY_FAILCONF will be sent to the API. This TCKEY_FAILCONF will
4686      * trigger a new TC_COMMIT_ACK which will be passed through the new DBTC
4687      * and sent onwards to the participating DBLQH's. The REMOVE_MARKER_ORD
4688      * signal is received from DBTC here in DBLQH can thus in some cases be
4689      * received when multiple times, possibly even more than twice since there
4690      * could be multiple failures serially. The second and further times will
4691      * end up in this else-branch. There is no longer any record in DBLQH
4692      * with the received transaction id.
4693      *
4694      * This only happens as part of normal TC_COMMIT_ACK reception, so not when
4695      * the flag removed_by_fail_api is set.
4696      *
4697      * It can also happen in a situation where we are performing a TC takeover.
4698      * If an LQH instance reports having a marker, then the new TC inserts all
4699      * LQH instances as having the marker. So this means that a lot of LQH
4700      * instances will receive this message even when they haven't claimed to
4701      * have the marker. This is so since the protocol in LQH_TRANSCONF doesn't
4702      * specify which LQH instances that sent the LQH_TRANSCONF. Actually this
4703      * isn't entirely true since the senders block reference is always received
4704      * as part of Protocol6, thus we can actually find the LQH instance from
4705      * this. But for now we take this safe approach and send too many signals.
4706      * These signals will also end up in this branch.
4707      */
4708 #if (defined VM_TRACE || defined ERROR_INSERT) && defined(wl4391_todo)
4709     ndbout_c("%u Rem marker failed[%.8x %.8x] remove_by_fail_api = %u", instance(),
4710              key.transid1, key.transid2, removed_by_fail_api);
4711 #endif
4712   }
4713 #ifdef MARKER_TRACE
4714   ndbout_c("%u Rem marker[%.8x %.8x]", instance(), key.transid1, key.transid2);
4715 #endif
4716 }
4717 
4718 
4719 /* -------------------------------------------------------------------------- */
4720 /* -------                 ENTER SEND_PACKED                          ------- */
4721 /* Used to force a packed signal to be sent if local signal buffer is not     */
4722 /* empty.                                                                     */
4723 /* -------------------------------------------------------------------------- */
execSEND_PACKED(Signal * signal)4724 void Dblqh::execSEND_PACKED(Signal* signal)
4725 {
4726   HostRecordPtr Thostptr;
4727   UintR i;
4728   UintR j;
4729   UintR TpackedListIndex = cpackedListIndex;
4730   jamEntry();
4731   for (i = 0; i < TpackedListIndex; i++) {
4732     Thostptr.i = cpackedList[i];
4733     ptrAss(Thostptr, hostRecord);
4734     jam();
4735     ndbrequire(Thostptr.i - 1 < MAX_NDB_NODES - 1);
4736     for (j = 0; j < NDB_ARRAY_SIZE(Thostptr.p->lqh_pack); j++)
4737     {
4738       struct PackedWordsContainer * container = &Thostptr.p->lqh_pack[j];
4739       if (container->noOfPackedWords > 0) {
4740         jamDebug();
4741         sendPackedSignal(signal, container);
4742       }
4743     }
4744     for (j = 0; j < NDB_ARRAY_SIZE(Thostptr.p->tc_pack); j++)
4745     {
4746       struct PackedWordsContainer * container = &Thostptr.p->tc_pack[j];
4747       if (container->noOfPackedWords > 0) {
4748         jamDebug();
4749         sendPackedSignal(signal, container);
4750       }
4751     }
4752     Thostptr.p->inPackedList = false;
4753   }//for
4754   cpackedListIndex = 0;
4755   return;
4756 }//Dblqh::execSEND_PACKED()
4757 
4758 void
updatePackedList(Signal * signal,HostRecord * ahostptr,Uint16 hostId)4759 Dblqh::updatePackedList(Signal* signal, HostRecord * ahostptr, Uint16 hostId)
4760 {
4761   Uint32 TpackedListIndex = cpackedListIndex;
4762   if (ahostptr->inPackedList == false) {
4763     jamDebug();
4764     ahostptr->inPackedList = true;
4765     cpackedList[TpackedListIndex] = hostId;
4766     cpackedListIndex = TpackedListIndex + 1;
4767   }//if
4768 }//Dblqh::updatePackedList()
4769 
4770 void
execREAD_PSEUDO_REQ(Signal * signal)4771 Dblqh::execREAD_PSEUDO_REQ(Signal* signal)
4772 {
4773   jamEntryDebug();
4774   TcConnectionrecPtr regTcPtr;
4775   regTcPtr.i = signal->theData[0];
4776   ndbrequire(tcConnect_pool.getValidPtr(regTcPtr));
4777 
4778   switch(signal->theData[1])
4779   {
4780   case AttributeHeader::RANGE_NO:
4781     signal->theData[0] = regTcPtr.p->m_scan_curr_range_no;
4782     break;
4783   case AttributeHeader::RECORDS_IN_RANGE:
4784   case AttributeHeader::INDEX_STAT_KEY:
4785   case AttributeHeader::INDEX_STAT_VALUE:
4786   {
4787     jam();
4788     // scanptr gets reset somewhere within the timeslice
4789     ScanRecordPtr tmp;
4790     tmp.i = regTcPtr.p->tcScanRec;
4791     ndbrequire(c_scanRecordPool.getValidPtr(tmp));
4792     signal->theData[0] = tmp.p->scanAccPtr;
4793     c_tux->execREAD_PSEUDO_REQ(signal);
4794     break;
4795   }
4796   case AttributeHeader::LOCK_REF:
4797   {
4798     /* Return 3x 32-bit words
4799      *  - LQH instance info
4800      *  - TC operation index
4801      *  - Bottom 32-bits of LQH-local key-request id (for uniqueness)
4802      */
4803     jam();
4804     signal->theData[0] = (getOwnNodeId() << 16) | regTcPtr.p->fragmentid;
4805     signal->theData[1] = regTcPtr.p->tcOprec;
4806     signal->theData[2] = (Uint32) regTcPtr.p->lqhKeyReqId;
4807     break;
4808   }
4809   case AttributeHeader::OP_ID:
4810   {
4811     jam();
4812     memcpy(signal->theData, &regTcPtr.p->lqhKeyReqId, 8);
4813     break;
4814   }
4815   case AttributeHeader::CORR_FACTOR64:
4816   {
4817     Uint32 add = 0;
4818     ScanRecordPtr tmp;
4819     tmp.i = regTcPtr.p->tcScanRec;
4820     if (tmp.i != RNIL)
4821     {
4822       ndbrequire(c_scanRecordPool.getValidPtr(tmp));
4823       add = tmp.p->m_curr_batch_size_rows;
4824     }
4825 
4826     signal->theData[0] = regTcPtr.p->m_corrFactorLo + add;
4827     signal->theData[1] = regTcPtr.p->m_corrFactorHi;
4828     break;
4829   }
4830   default:
4831     ndbabort();
4832   }
4833 }
4834 
4835 /* ************>> */
4836 /*  TUPKEYCONF  > */
4837 /* ************>> */
execTUPKEYCONF(Signal * signal)4838 void Dblqh::execTUPKEYCONF(Signal* signal)
4839 {
4840   TcConnectionrecPtr regTcPtr = m_tc_connect_ptr;
4841   switch (regTcPtr.p->transactionState) {
4842   case TcConnectionrec::SCAN_TUPKEY:
4843   {
4844     jam();
4845     scanTupkeyConfLab(signal, regTcPtr.p);
4846     return;
4847   }
4848   case TcConnectionrec::WAIT_TUP:
4849   {
4850     FragrecordPtr regFragptr = fragptr;
4851     const TupKeyConf * const tupKeyConf = (TupKeyConf *)signal->getDataPtr();
4852     jamDebug();
4853     if (regTcPtr.p->seqNoReplica == 0) // Primary replica
4854       regTcPtr.p->numFiredTriggers = tupKeyConf->numFiredTriggers;
4855 
4856     Fragrecord::UsageStat& useStat = regFragptr.p->m_useStat;
4857     useStat.m_keyReqWordsReturned += tupKeyConf->readLength;
4858     useStat.m_keyInstructionCount += tupKeyConf->noExecInstructions;
4859 
4860     tupkeyConfLab(signal, regTcPtr);
4861     return;
4862   }
4863   case TcConnectionrec::COPY_TUPKEY:
4864   {
4865     jam();
4866     copyTupkeyConfLab(signal, regTcPtr);
4867     return;
4868   }
4869   case TcConnectionrec::WAIT_TUP_TO_ABORT:
4870   {
4871     Uint32 activeCreat = regTcPtr.p->activeCreat;
4872     jam();
4873 /* ------------------------------------------------------------------------- */
4874 // Abort was not ready to start until this signal came back. Now we are ready
4875 // to start the abort.
4876 /* ------------------------------------------------------------------------- */
4877     if (unlikely(activeCreat == Fragrecord::AC_NR_COPY))
4878     {
4879       jam();
4880       ndbrequire(regTcPtr.p->m_nr_delete.m_cnt);
4881       regTcPtr.p->m_nr_delete.m_cnt--;
4882       if (regTcPtr.p->m_nr_delete.m_cnt)
4883       {
4884 	jam();
4885 	/**
4886 	 * Let operation wait for pending NR operations
4887 	 *   even for before writing log...(as it's simpler)
4888 	 */
4889 
4890 #ifdef VM_TRACE
4891 	/**
4892 	 * Only disk table can have pending ops...
4893 	 */
4894 	TablerecPtr tablePtr;
4895 	tablePtr.i = regTcPtr.p->tableref;
4896 	ptrCheckGuard(tablePtr, ctabrecFileSize, tablerec);
4897 	ndbrequire(tablePtr.p->m_disk_table);
4898 #endif
4899 	return;
4900       }
4901     }
4902 
4903     abortCommonLab(signal, regTcPtr);
4904     return;
4905   }
4906   case TcConnectionrec::WAIT_ACC_ABORT:
4907   case TcConnectionrec::ABORT_QUEUED:
4908   {
4909     jam();
4910 /* ------------------------------------------------------------------------- */
4911 /*      IGNORE SINCE ABORT OF THIS OPERATION IS ONGOING ALREADY.             */
4912 /* ------------------------------------------------------------------------- */
4913     return;
4914   }
4915   default:
4916   {
4917     jamLine(regTcPtr.p->transactionState);
4918     ndbabort();
4919   }
4920   }//switch
4921 }//Dblqh::execTUPKEYCONF()
4922 
4923 /* ************> */
4924 /*  TUPKEYREF  > */
4925 /* ************> */
execTUPKEYREF(Signal * signal)4926 void Dblqh::execTUPKEYREF(Signal* signal)
4927 {
4928   const TupKeyRef * const tupKeyRef = (TupKeyRef *)signal->getDataPtr();
4929   jamEntryDebug();
4930   TcConnectionrecPtr tcConnectptr = m_tc_connect_ptr;
4931   terrorCode = tupKeyRef->errorCode;
4932   TRACE_OP(tcConnectptr.p, "TUPKEYREF");
4933 
4934 #ifdef VM_TRACE
4935   if (unlikely(tcConnectptr.p->activeCreat == Fragrecord::AC_NR_COPY))
4936   {
4937     ndbassert(tcConnectptr.p->transactionState == TcConnectionrec::WAIT_TUP ||
4938       tcConnectptr.p->transactionState ==TcConnectionrec::WAIT_TUP_TO_ABORT);
4939   }
4940 #endif
4941   switch (tcConnectptr.p->transactionState) {
4942   case TcConnectionrec::SCAN_TUPKEY:
4943   {
4944     jamDebug();
4945     scanTupkeyRefLab(signal, tcConnectptr);
4946     return;
4947   }
4948   case TcConnectionrec::WAIT_TUP:
4949   {
4950     const TupKeyRef * const tupKeyRef = (TupKeyRef *)signal->getDataPtr();
4951     jamDebug();
4952     if (unlikely(tcConnectptr.p->activeCreat == Fragrecord::AC_NR_COPY))
4953     {
4954       jam();
4955       ndbrequire(tcConnectptr.p->m_nr_delete.m_cnt);
4956       tcConnectptr.p->m_nr_delete.m_cnt--;
4957     }
4958     Fragrecord::UsageStat& useStat = fragptr.p->m_useStat;
4959     useStat.m_keyRefCount++;
4960     useStat.m_keyInstructionCount += tupKeyRef->noExecInstructions;
4961     abortErrorLab(signal, tcConnectptr);
4962     return;
4963   }
4964   case TcConnectionrec::COPY_TUPKEY:
4965   {
4966     copyTupkeyRefLab(signal, tcConnectptr);
4967     return;
4968   }
4969   case TcConnectionrec::WAIT_TUP_TO_ABORT:
4970   {
4971     jam();
4972     if (unlikely(tcConnectptr.p->activeCreat == Fragrecord::AC_NR_COPY))
4973     {
4974       jam();
4975       ndbrequire(tcConnectptr.p->m_nr_delete.m_cnt);
4976       tcConnectptr.p->m_nr_delete.m_cnt--;
4977     }
4978 /* ------------------------------------------------------------------------- */
4979 // Abort was not ready to start until this signal came back. Now we are ready
4980 // to start the abort.
4981 /* ------------------------------------------------------------------------- */
4982     abortCommonLab(signal, tcConnectptr);
4983     return;
4984   }
4985   case TcConnectionrec::WAIT_ACC_ABORT:
4986   case TcConnectionrec::ABORT_QUEUED:
4987   {
4988     jam();
4989 /* ------------------------------------------------------------------------- */
4990 /*       IGNORE SINCE ABORT OF THIS OPERATION IS ONGOING ALREADY.            */
4991 /* ------------------------------------------------------------------------- */
4992     return;
4993   }
4994   default:
4995     jamLine(tcConnectptr.p->transactionState);
4996     ndbabort();
4997   }//switch
4998 }//Dblqh::execTUPKEYREF()
4999 
sendPackedSignal(Signal * signal,struct PackedWordsContainer * container)5000 void Dblqh::sendPackedSignal(Signal* signal,
5001                              struct PackedWordsContainer * container)
5002 {
5003   Uint32 noOfWords = container->noOfPackedWords;
5004   BlockReference hostRef = container->hostBlockRef;
5005   container->noOfPackedWords = 0;
5006   MEMCOPY_NO_WORDS(&signal->theData[0],
5007                    &container->packedWords[0],
5008                    noOfWords);
5009   if (VERIFY_PACKED_SEND)
5010   {
5011     int receiveTypes = (refToMain(hostRef) == DBLQH)?
5012       LQH_RECEIVE_TYPES:
5013       TC_RECEIVE_TYPES;
5014     ndbrequire(PackedSignal::verify(&signal->theData[0],
5015                                     noOfWords,
5016                                     hostRef,
5017                                     receiveTypes,
5018                                     5)); /* Commit signal length */
5019   }
5020   sendSignal(hostRef, GSN_PACKED_SIGNAL, signal, noOfWords, JBB);
5021 }
5022 
sendCommitLqh(Signal * signal,BlockReference alqhBlockref,const TcConnectionrec * regTcPtr)5023 void Dblqh::sendCommitLqh(Signal* signal,
5024                           BlockReference alqhBlockref,
5025                           const TcConnectionrec* regTcPtr)
5026 {
5027   Uint32 instanceKey = refToInstance(alqhBlockref);
5028   ndbassert(refToMain(alqhBlockref) == DBLQH);
5029 
5030   if (instanceKey > MAX_NDBMT_LQH_THREADS)
5031   {
5032     /* No send packed support in these cases */
5033     jam();
5034     signal->theData[0] = regTcPtr->clientConnectrec;
5035     signal->theData[1] = regTcPtr->transid[0];
5036     signal->theData[2] = regTcPtr->transid[1];
5037     sendSignal(alqhBlockref, GSN_COMMIT, signal, 3, JBB);
5038     return;
5039   }
5040 
5041   HostRecordPtr Thostptr;
5042 
5043   Thostptr.i = refToNode(alqhBlockref);
5044   ptrCheckGuard(Thostptr, chostFileSize, hostRecord);
5045   struct PackedWordsContainer * container = &Thostptr.p->lqh_pack[instanceKey];
5046 
5047   Uint32 Tdata[5];
5048   Tdata[0] = regTcPtr->clientConnectrec;
5049   Tdata[1] = regTcPtr->gci_hi;
5050   Tdata[2] = regTcPtr->transid[0];
5051   Tdata[3] = regTcPtr->transid[1];
5052   Tdata[4] = regTcPtr->gci_lo;
5053   Uint32 len = 5;
5054 
5055   if (container->noOfPackedWords > 25 - len) {
5056     jam();
5057     sendPackedSignal(signal, container);
5058   } else {
5059     jam();
5060     updatePackedList(signal, Thostptr.p, Thostptr.i);
5061   }
5062 
5063   Tdata[0] |= (ZCOMMIT << 28);
5064   Uint32 pos = container->noOfPackedWords;
5065   container->noOfPackedWords = pos + len;
5066   memcpy(&container->packedWords[pos], &Tdata[0], len << 2);
5067 }
5068 
sendCompleteLqh(Signal * signal,BlockReference alqhBlockref,const TcConnectionrec * regTcPtr)5069 void Dblqh::sendCompleteLqh(Signal* signal,
5070                             BlockReference alqhBlockref,
5071                             const TcConnectionrec* regTcPtr)
5072 {
5073   Uint32 instanceKey = refToInstance(alqhBlockref);
5074   ndbassert(refToMain(alqhBlockref) == DBLQH);
5075 
5076   if (instanceKey > MAX_NDBMT_LQH_THREADS)
5077   {
5078     /* No send packed support in these cases */
5079     jam();
5080     signal->theData[0] = regTcPtr->clientConnectrec;
5081     signal->theData[1] = regTcPtr->transid[0];
5082     signal->theData[2] = regTcPtr->transid[1];
5083     sendSignal(alqhBlockref, GSN_COMPLETE, signal, 3, JBB);
5084     return;
5085   }
5086 
5087   HostRecordPtr Thostptr;
5088 
5089   Thostptr.i = refToNode(alqhBlockref);
5090   ptrCheckGuard(Thostptr, chostFileSize, hostRecord);
5091   struct PackedWordsContainer * container = &Thostptr.p->lqh_pack[instanceKey];
5092 
5093   Uint32 Tdata[3];
5094   Tdata[0] = regTcPtr->clientConnectrec;
5095   Tdata[1] = regTcPtr->transid[0];
5096   Tdata[2] = regTcPtr->transid[1];
5097   Uint32 len = 3;
5098 
5099   if (container->noOfPackedWords > 22) {
5100     jam();
5101     sendPackedSignal(signal, container);
5102   } else {
5103     jam();
5104     updatePackedList(signal, Thostptr.p, Thostptr.i);
5105   }
5106 
5107   Tdata[0] |= (ZCOMPLETE << 28);
5108   Uint32 pos = container->noOfPackedWords;
5109   container->noOfPackedWords = pos + len;
5110   memcpy(&container->packedWords[pos], &Tdata[0], len << 2);
5111 }
5112 
sendCommittedTc(Signal * signal,BlockReference atcBlockref,const TcConnectionrec * regTcPtr)5113 void Dblqh::sendCommittedTc(Signal* signal,
5114                             BlockReference atcBlockref,
5115                             const TcConnectionrec* regTcPtr)
5116 {
5117   Uint32 instanceKey = refToInstance(atcBlockref);
5118 
5119   ndbassert(refToMain(atcBlockref) == DBTC);
5120   if (instanceKey > MAX_NDBMT_TC_THREADS)
5121   {
5122     /* No send packed support in these cases */
5123     jam();
5124     signal->theData[0] = regTcPtr->clientConnectrec;
5125     signal->theData[1] = regTcPtr->transid[0];
5126     signal->theData[2] = regTcPtr->transid[1];
5127     sendSignal(atcBlockref, GSN_COMMITTED, signal, 3, JBB);
5128     return;
5129   }
5130 
5131   HostRecordPtr Thostptr;
5132   Thostptr.i = refToNode(atcBlockref);
5133   ptrCheckGuard(Thostptr, chostFileSize, hostRecord);
5134   struct PackedWordsContainer * container = &Thostptr.p->tc_pack[instanceKey];
5135 
5136   Uint32 Tdata[3];
5137   Tdata[0] = regTcPtr->clientConnectrec;
5138   Tdata[1] = regTcPtr->transid[0];
5139   Tdata[2] = regTcPtr->transid[1];
5140   Uint32 len = 3;
5141 
5142   if (container->noOfPackedWords > 22) {
5143     jam();
5144     sendPackedSignal(signal, container);
5145   } else {
5146     jam();
5147     updatePackedList(signal, Thostptr.p, Thostptr.i);
5148   }
5149 
5150   Tdata[0] |= (ZCOMMITTED << 28);
5151   Uint32 pos = container->noOfPackedWords;
5152   container->noOfPackedWords = pos + len;
5153   memcpy(&container->packedWords[pos], &Tdata[0], len << 2);
5154 }
5155 
sendCompletedTc(Signal * signal,BlockReference atcBlockref,const TcConnectionrec * regTcPtr)5156 void Dblqh::sendCompletedTc(Signal* signal,
5157                             BlockReference atcBlockref,
5158                             const TcConnectionrec* regTcPtr)
5159 {
5160   Uint32 instanceKey = refToInstance(atcBlockref);
5161 
5162   ndbassert(refToMain(atcBlockref) == DBTC);
5163   if (instanceKey > MAX_NDBMT_TC_THREADS)
5164   {
5165     /* No handling of send packed in those cases */
5166     jam();
5167     signal->theData[0] = regTcPtr->clientConnectrec;
5168     signal->theData[1] = regTcPtr->transid[0];
5169     signal->theData[2] = regTcPtr->transid[1];
5170     sendSignal(atcBlockref, GSN_COMPLETED, signal, 3, JBB);
5171     return;
5172   }
5173 
5174   HostRecordPtr Thostptr;
5175   Thostptr.i = refToNode(atcBlockref);
5176   ptrCheckGuard(Thostptr, chostFileSize, hostRecord);
5177   struct PackedWordsContainer * container = &Thostptr.p->tc_pack[instanceKey];
5178 
5179   Uint32 Tdata[3];
5180   Tdata[0] = regTcPtr->clientConnectrec;
5181   Tdata[1] = regTcPtr->transid[0];
5182   Tdata[2] = regTcPtr->transid[1];
5183   Uint32 len = 3;
5184 
5185   if (container->noOfPackedWords > 22) {
5186     jam();
5187     sendPackedSignal(signal, container);
5188   } else {
5189     jam();
5190     updatePackedList(signal, Thostptr.p, Thostptr.i);
5191   }
5192 
5193   Tdata[0] |= (ZCOMPLETED << 28);
5194   Uint32 pos = container->noOfPackedWords;
5195   container->noOfPackedWords = pos + len;
5196   memcpy(&container->packedWords[pos], &Tdata[0], len << 2);
5197 }
5198 
sendLqhkeyconfTc(Signal * signal,BlockReference atcBlockref,const TcConnectionrecPtr tcConnectptr)5199 void Dblqh::sendLqhkeyconfTc(Signal* signal,
5200                              BlockReference atcBlockref,
5201                              const TcConnectionrecPtr tcConnectptr)
5202 {
5203   LqhKeyConf* lqhKeyConf;
5204   struct PackedWordsContainer * container = nullptr;
5205   bool send_packed = true;
5206   HostRecordPtr Thostptr;
5207   Thostptr.i = refToNode(atcBlockref);
5208   Uint32 instanceKey = refToInstance(atcBlockref);
5209   ptrCheckGuard(Thostptr, chostFileSize, hostRecord);
5210   Uint32 block = refToMain(atcBlockref);
5211 
5212   if (block == DBLQH)
5213   {
5214     if (instanceKey <= MAX_NDBMT_LQH_THREADS)
5215     {
5216       container = &Thostptr.p->lqh_pack[instanceKey];
5217     }
5218     else
5219     {
5220       send_packed = false;
5221     }
5222   }
5223   else if (block == DBTC)
5224   {
5225     if (instanceKey <= MAX_NDBMT_TC_THREADS)
5226     {
5227       container = &Thostptr.p->tc_pack[instanceKey];
5228     }
5229     else
5230     {
5231       send_packed = false;
5232     }
5233   }
5234   else
5235   {
5236     send_packed = false;
5237   }
5238 
5239 /*******************************************************************
5240 // Normal path
5241 // This signal was intended for DBTC as part of the normal transaction
5242 // execution.
5243 // More unusual path
5244 // This signal was intended for DBLQH as part of log execution or
5245 // node recovery.
5246 // Yet another path
5247 // Intended for DBSPJ as part of join processing
5248 ********************************************************************/
5249   if (send_packed)
5250   {
5251     if (container->noOfPackedWords > (25 - LqhKeyConf::SignalLength)) {
5252       jamDebug();
5253       sendPackedSignal(signal, container);
5254     } else {
5255       jamDebug();
5256       updatePackedList(signal, Thostptr.p, Thostptr.i);
5257     }//if
5258     lqhKeyConf = (LqhKeyConf *)
5259       &container->packedWords[container->noOfPackedWords];
5260     container->noOfPackedWords += LqhKeyConf::SignalLength;
5261   }
5262   else
5263   {
5264     lqhKeyConf = (LqhKeyConf *)&signal->theData[0];
5265   }
5266 
5267   Uint32 ptrAndType = tcConnectptr.i | (ZLQHKEYCONF << 28);
5268   Uint32 tcOprec = tcConnectptr.p->tcOprec;
5269   Uint32 ownRef = cownref;
5270   lqhKeyConf->connectPtr = ptrAndType;
5271   lqhKeyConf->opPtr = tcOprec;
5272   lqhKeyConf->userRef = ownRef;
5273 
5274   Uint32 readlenAi = tcConnectptr.p->readlenAi;
5275   Uint32 transid1 = tcConnectptr.p->transid[0];
5276   Uint32 transid2 = tcConnectptr.p->transid[1];
5277   Uint32 numFiredTriggers = tcConnectptr.p->numFiredTriggers;
5278   lqhKeyConf->readLen = readlenAi;
5279   lqhKeyConf->transId1 = transid1;
5280   lqhKeyConf->transId2 = transid2;
5281   lqhKeyConf->numFiredTriggers = numFiredTriggers;
5282 
5283   if (!send_packed)
5284   {
5285     lqhKeyConf->connectPtr = tcConnectptr.i;
5286     if (block == RESTORE)
5287     {
5288       ndbrequire(refToNode(atcBlockref) == cownNodeid &&
5289                  refToInstance(atcBlockref) == instance());
5290       EXECUTE_DIRECT(RESTORE, GSN_LQHKEYCONF,
5291                      signal, LqhKeyConf::SignalLength);
5292     }
5293     else
5294     {
5295       sendSignal(atcBlockref, GSN_LQHKEYCONF,
5296                  signal, LqhKeyConf::SignalLength, JBB);
5297     }
5298   }
5299 }//Dblqh::sendLqhkeyconfTc()
5300 
5301 /* ************************************************************************>>
5302  * KEYINFO: Get tuple request from DBTC. Next step is to contact DBACC to get
5303  * key to tuple if all key/attrinfo has been received, else for more attrinfo
5304  * signals.
5305  * ************************************************************************>> */
execKEYINFO(Signal * signal)5306 void Dblqh::execKEYINFO(Signal* signal)
5307 {
5308   Uint32 tcOprec = signal->theData[0];
5309   Uint32 transid1 = signal->theData[1];
5310   Uint32 transid2 = signal->theData[2];
5311   jamEntry();
5312   TcConnectionrecPtr tcConnectptr;
5313   if (findTransaction(transid1, transid2, tcOprec, 0, tcConnectptr) != ZOK)
5314   {
5315     jam();
5316     return;
5317   }//if
5318   setup_key_pointers(tcConnectptr.i);
5319   Uint32* const data = signal->theData + KeyInfo::HeaderLength;
5320   const Uint32 len = signal->getLength() - KeyInfo::HeaderLength;
5321 
5322   TcConnectionrec * const regTcPtr = tcConnectptr.p;
5323   TcConnectionrec::TransactionState state = regTcPtr->transactionState;
5324   if (state != TcConnectionrec::WAIT_TUPKEYINFO &&
5325       state != TcConnectionrec::WAIT_SCAN_AI)
5326   {
5327     jam();
5328 /*****************************************************************************/
5329 /* TRANSACTION WAS ABORTED, THIS IS MOST LIKELY A SIGNAL BELONGING TO THE    */
5330 /* ABORTED TRANSACTION. THUS IGNORE THE SIGNAL.                              */
5331 /*****************************************************************************/
5332     return;
5333   }//if
5334 
5335   Uint32 errorCode =
5336     handleLongTupKey(signal, data, len, regTcPtr);
5337 
5338   if (errorCode != 0) {
5339     if (errorCode == 1) {
5340       jam();
5341       return;
5342     }//if
5343     jam();
5344     terrorCode = errorCode;
5345     if(state == TcConnectionrec::WAIT_TUPKEYINFO)
5346       abortErrorLab(signal, tcConnectptr);
5347     else
5348       abort_scan(signal, regTcPtr->tcScanRec, errorCode, tcConnectptr);
5349     return;
5350   }//if
5351   if(state == TcConnectionrec::WAIT_TUPKEYINFO)
5352   {
5353     endgettupkeyLab(signal, tcConnectptr);
5354   }
5355   return;
5356 }//Dblqh::execKEYINFO()
5357 
5358 /* ------------------------------------------------------------------------- */
5359 /* FILL IN KEY DATA INTO DATA BUFFERS.                                       */
5360 /* ------------------------------------------------------------------------- */
handleLongTupKey(Signal * signal,Uint32 * dataPtr,Uint32 len,TcConnectionrec * regTcPtr)5361 Uint32 Dblqh::handleLongTupKey(Signal* signal,
5362 			       Uint32* dataPtr,
5363                                Uint32 len,
5364                                TcConnectionrec* regTcPtr)
5365 {
5366   Uint32 total = regTcPtr->save1 + len;
5367   Uint32 primKeyLen = regTcPtr->primKeyLen;
5368 
5369   if (unlikely(total > primKeyLen))
5370   {
5371     /**
5372      * DBLQH 6.3 has the bad taste to send more KEYINFO than what is
5373      *  really in the key...up to 3 words extra
5374      */
5375     Uint32 extra = total - primKeyLen;
5376     ndbrequire(extra <= 3);
5377     ndbrequire(len > extra);
5378     len -= extra;
5379   }
5380 
5381   bool ok= appendToSection(regTcPtr->keyInfoIVal,
5382                            dataPtr,
5383                            len);
5384   if (unlikely(!ok))
5385   {
5386     jam();
5387     return ZGET_DATAREC_ERROR;
5388   }
5389 
5390   regTcPtr->save1 = total;
5391   return (total >= primKeyLen ? 0 : 1);
5392 }//Dblqh::handleLongTupKey()
5393 
5394 /* ------------------------------------------------------------------------- */
5395 /* -------                HANDLE ATTRINFO SIGNALS                    ------- */
5396 /*                                                                           */
5397 /* ------------------------------------------------------------------------- */
5398 /* ************************************************************************>> */
5399 /*  ATTRINFO: Continuation of KEYINFO signal (except for scans that do not use*/
5400 /*  any KEYINFO). When all key and attribute info is received we contact DBACC*/
5401 /*  for index handling.                                                       */
5402 /* ************************************************************************>> */
execATTRINFO(Signal * signal)5403 void Dblqh::execATTRINFO(Signal* signal)
5404 {
5405   Uint32 tcOprec = signal->theData[0];
5406   Uint32 transid1 = signal->theData[1];
5407   Uint32 transid2 = signal->theData[2];
5408   jamEntry();
5409   TcConnectionrecPtr tcConnectptr;
5410   if (findTransaction(transid1,
5411                       transid2,
5412                       tcOprec, 0,
5413                       tcConnectptr) != ZOK)
5414   {
5415     jam();
5416     return;
5417   }//if
5418 
5419   setup_key_pointers(tcConnectptr.i);
5420   Uint32* const dataPtr = signal->getDataPtrSend() + AttrInfo::HeaderLength;
5421   const Uint32 length = signal->getLength() - AttrInfo::HeaderLength;
5422 
5423   TcConnectionrec * const regTcPtr = tcConnectptr.p;
5424   Uint32 totReclenAi = regTcPtr->totReclenAi;
5425   Uint32 currReclenAi = regTcPtr->currReclenAi + length;
5426   regTcPtr->currReclenAi = currReclenAi;
5427   if (totReclenAi == currReclenAi) {
5428     switch (regTcPtr->transactionState) {
5429     case TcConnectionrec::WAIT_ATTR:
5430     {
5431       jam();
5432       lqhAttrinfoLab(signal, dataPtr, length, tcConnectptr);
5433       endgettupkeyLab(signal, tcConnectptr);
5434       return;
5435       break;
5436     }
5437     case TcConnectionrec::WAIT_SCAN_AI:
5438       jam();
5439       scanAttrinfoLab(signal, dataPtr, length, tcConnectptr);
5440       return;
5441       break;
5442     case TcConnectionrec::WAIT_TUP_TO_ABORT:
5443     case TcConnectionrec::LOG_ABORT_QUEUED:
5444     case TcConnectionrec::ABORT_QUEUED:
5445     case TcConnectionrec::WAIT_ACC_ABORT:
5446     case TcConnectionrec::WAIT_AI_AFTER_ABORT:
5447       jam();
5448       aiStateErrorCheckLab(signal, dataPtr,length, tcConnectptr);
5449       return;
5450       break;
5451     default:
5452       jam();
5453       ndbrequire(regTcPtr->abortState != TcConnectionrec::ABORT_IDLE);
5454       break;
5455     }//switch
5456   } else if (currReclenAi < totReclenAi) {
5457     jam();
5458     switch (regTcPtr->transactionState) {
5459     case TcConnectionrec::WAIT_ATTR:
5460       jam();
5461       lqhAttrinfoLab(signal, dataPtr, length, tcConnectptr);
5462       return;
5463       break;
5464     case TcConnectionrec::WAIT_SCAN_AI:
5465       jam();
5466       scanAttrinfoLab(signal, dataPtr, length, tcConnectptr);
5467       return;
5468       break;
5469     case TcConnectionrec::WAIT_TUP_TO_ABORT:
5470     case TcConnectionrec::LOG_ABORT_QUEUED:
5471     case TcConnectionrec::ABORT_QUEUED:
5472     case TcConnectionrec::WAIT_ACC_ABORT:
5473     case TcConnectionrec::WAIT_AI_AFTER_ABORT:
5474       jam();
5475       aiStateErrorCheckLab(signal, dataPtr, length, tcConnectptr);
5476       return;
5477       break;
5478     default:
5479       jam();
5480       ndbrequire(regTcPtr->abortState != TcConnectionrec::ABORT_IDLE);
5481       break;
5482     }//switch
5483   } else {
5484     switch (regTcPtr->transactionState) {
5485     case TcConnectionrec::WAIT_SCAN_AI:
5486       jam();
5487       scanAttrinfoLab(signal, dataPtr, length, tcConnectptr);
5488       return;
5489       break;
5490     default:
5491       ndbout_c("%d", regTcPtr->transactionState);
5492       ndbabort();
5493     }//switch
5494   }//if
5495   return;
5496 }
5497 
5498 /* ************************************************************************>> */
5499 /*  TUP_ATTRINFO: Interpreted execution in DBTUP generates redo-log info      */
5500 /*  which is sent back to DBLQH for logging. This is because the decision     */
5501 /*  to execute or not is made in DBTUP and thus we cannot start logging until */
5502 /*  DBTUP part has been run.                                                  */
5503 /* ************************************************************************>> */
execTUP_ATTRINFO(Signal * signal)5504 void Dblqh::execTUP_ATTRINFO(Signal* signal)
5505 {
5506   jamEntryDebug();
5507   TcConnectionrecPtr tcConnectptr = m_tc_connect_ptr;
5508   TcConnectionrec * const regTcPtr = tcConnectptr.p;
5509 
5510   ndbrequire(regTcPtr->transactionState == TcConnectionrec::WAIT_TUP);
5511 
5512   /* TUP_ATTRINFO signal is unrelated to ATTRINFO
5513    * It just transports a section IVAL from TUP back to
5514    * LQH
5515    */
5516   ndbrequire(signal->header.theLength == 3);
5517   Uint32 tupAttrInfoWords= signal->theData[1];
5518   Uint32 tupAttrInfoIVal= signal->theData[2];
5519 
5520   ndbassert(tupAttrInfoWords > 0);
5521   ndbassert(tupAttrInfoIVal != RNIL);
5522 
5523   /* If we have stored ATTRINFO that we sent to TUP,
5524    * free it now
5525    */
5526   if (regTcPtr->attrInfoIVal != RNIL)
5527   {
5528     /* We should be expecting to receive attrInfo back */
5529     ndbassert( !(regTcPtr->m_flags &
5530                  TcConnectionrec::OP_SAVEATTRINFO) );
5531     releaseSection( regTcPtr->attrInfoIVal );
5532     regTcPtr->attrInfoIVal= RNIL;
5533   }
5534 
5535   /* Store reference to ATTRINFO from TUP */
5536   regTcPtr->attrInfoIVal= tupAttrInfoIVal;
5537   regTcPtr->currTupAiLen= tupAttrInfoWords;
5538 }//Dblqh::execTUP_ATTRINFO()
5539 
5540 /* ------------------------------------------------------------------------- */
5541 /* -------                HANDLE ATTRINFO FROM LQH                   ------- */
5542 /*                                                                           */
5543 /* ------------------------------------------------------------------------- */
lqhAttrinfoLab(Signal * signal,Uint32 * dataPtr,Uint32 length,const TcConnectionrecPtr tcConnectptr)5544 void Dblqh::lqhAttrinfoLab(Signal* signal,
5545                            Uint32* dataPtr,
5546                            Uint32 length,
5547                            const TcConnectionrecPtr tcConnectptr)
5548 {
5549   /* Store received AttrInfo in a long section */
5550   jam();
5551   if (saveAttrInfoInSection(dataPtr, length, tcConnectptr.p) == ZOK) {
5552     ;
5553   } else {
5554     jam();
5555 /* ------------------------------------------------------------------------- */
5556 /* WE MIGHT BE WAITING FOR RESPONSE FROM SOME BLOCK HERE. THUS WE NEED TO    */
5557 /* GO THROUGH THE STATE MACHINE FOR THE OPERATION.                           */
5558 /* ------------------------------------------------------------------------- */
5559     localAbortStateHandlerLab(signal, tcConnectptr);
5560     return;
5561   }//if
5562 }//Dblqh::lqhAttrinfoLab()
5563 
5564 /* ------------------------------------------------------------------------- */
5565 /* ------         FIND TRANSACTION BY USING HASH TABLE               ------- */
5566 /*                                                                           */
5567 /* We keep a hash structure of TcConnectionrec which are identified by:      */
5568 /*  - Id of Transaction owning this TcConnectionrec.                         */
5569 /*  - A 'tcOpRec' id which uniquely(*below) identify this TcConnectionRec    */
5570 /*    within this specific transaction.                                      */
5571 /*  - An optional 'hashHi' id used for SCANREQs in cases where 'tcOpRec'     */
5572 /*    on its own cant provide uniqueness.                                    */
5573 /*    This is required in cases where there are multiple (internal) clients  */
5574 /*    producing REQs where the uniqueness is only guaranteed within          */
5575 /*    each client. Currently the only such client is the SPJ block.          */
5576 /*                                                                           */
5577 /* Hash lookup of TcConnectionrecPtr might be required for TcConnectionRecs  */
5578 /* having a lifetime beyond the initial REQ. That is:                        */
5579 /*  - Short requests awaiting for a later ATTR- or KEYINFO.                  */
5580 /*  - SCANREQ which may need a later NEXTREQ to fetch more or close scan     */
5581 /*  - Transactional (non-DirtyOp) REQs which need a later abort, commit      */
5582 /*    or unlock request.                                                     */
5583 /*                                                                           */
5584 /* TcConnectionrec's identified as not requiring hash lookup are not         */
5585 /* inserted in the hash table!                                               */
5586 /*                                                                           */
5587 /* 'tcOpRec' ids comes from TC. Where TC has released the record (dirtyOp),  */
5588 /* the id can be reused. Therefore it cannot be considered 'unique' beyond   */
5589 /* the reception of the request signal (train). For non dirty operations it  */
5590 /* is unique for the lifecycle of the operation at TC.                       */
5591 /*                                                                           */
5592 /* NOTE:                                                                     */
5593 /*   The internal clients of NDB does *not* guarantee hash uniqueness        */
5594 /*   for LQHKEYREQs as described above (SPJ, node restart ..). However,      */
5595 /*   these requests are all 'long', 'dirtyOp'-requests and thus neither      */
5596 /*   inserted nor searched after in the hash table.                          */
5597 /*                                                                           */
5598 /* ------------------------------------------------------------------------- */
findTransaction(UintR Transid1,UintR Transid2,UintR TcOprec,Uint32 hi,TcConnectionrecPtr & tcConnectptr)5599 int Dblqh::findTransaction(UintR Transid1, UintR Transid2, UintR TcOprec,
5600                            Uint32 hi,
5601                            TcConnectionrecPtr& tcConnectptr)
5602 {
5603   TcConnectionrecPtr locTcConnectptr;
5604 
5605   Uint32 ThashIndex = (Transid1 ^ TcOprec) & (TRANSID_HASH_SIZE - 1);
5606   locTcConnectptr.i = ctransidHash[ThashIndex];
5607   while (locTcConnectptr.i != RNIL) {
5608     ndbrequire(tcConnect_pool.getUncheckedPtrRW(locTcConnectptr));
5609     if ((locTcConnectptr.p->transid[0] == Transid1) &&
5610         (locTcConnectptr.p->transid[1] == Transid2) &&
5611         (locTcConnectptr.p->tcOprec == TcOprec) &&
5612         (locTcConnectptr.p->tcHashKeyHi == hi)) {
5613 /* FIRST PART OF TRANSACTION CORRECT */
5614 /* SECOND PART ALSO CORRECT */
5615 /* THE OPERATION RECORD POINTER IN TC WAS ALSO CORRECT */
5616       jam();
5617       tcConnectptr = locTcConnectptr;
5618       ndbrequire(Magic::check_ptr(locTcConnectptr.p));
5619       ndbassert(tcConnectptr.p->hashIndex == ThashIndex);
5620       return (int)ZOK;
5621     }//if
5622     jam();
5623 /* THIS WAS NOT THE TRANSACTION WHICH WAS SOUGHT */
5624     locTcConnectptr.i = locTcConnectptr.p->nextHashRec;
5625     ndbrequire(Magic::check_ptr(locTcConnectptr.p));
5626   }//while
5627 /* WE DID NOT FIND THE TRANSACTION, REPORT NOT FOUND */
5628   return (int)ZNOT_FOUND;
5629 }//Dblqh::findTransaction()
5630 
5631 /* ------------------------------------------------------------------------- */
5632 /* -------           SAVE ATTRINFO INTO ATTR SECTION                 ------- */
5633 /*                                                                           */
5634 /* ------------------------------------------------------------------------- */
saveAttrInfoInSection(const Uint32 * dataPtr,Uint32 len,TcConnectionrec * regTcPtr)5635 int Dblqh::saveAttrInfoInSection(const Uint32* dataPtr,
5636                                  Uint32 len,
5637                                  TcConnectionrec* regTcPtr)
5638 {
5639   bool ok= appendToSection(regTcPtr->attrInfoIVal,
5640                            dataPtr,
5641                            len);
5642 
5643   if (unlikely(!ok))
5644   {
5645     jam();
5646     terrorCode = ZGET_ATTRINBUF_ERROR;
5647     return ZGET_ATTRINBUF_ERROR;
5648   }//if
5649 
5650   if (regTcPtr->m_flags & TcConnectionrec::OP_SAVEATTRINFO)
5651     regTcPtr->currTupAiLen += len;
5652 
5653   return ZOK;
5654 } // saveAttrInfoInSection
5655 
5656 inline
5657 static void
prefetch_op_record_3(Uint32 * op_ptr)5658 prefetch_op_record_3(Uint32 *op_ptr)
5659 {
5660   NDB_PREFETCH_WRITE(op_ptr);
5661   NDB_PREFETCH_WRITE(op_ptr + 16);
5662   NDB_PREFETCH_WRITE(op_ptr + 32);
5663 }
5664 
5665 inline
5666 static void
prefetch_op_record_4(Uint32 * op_ptr)5667 prefetch_op_record_4(Uint32 *op_ptr)
5668 {
5669   //First cacheline prefetched in TransientPool
5670   NDB_PREFETCH_WRITE(op_ptr + 16);
5671   NDB_PREFETCH_WRITE(op_ptr + 32);
5672   NDB_PREFETCH_WRITE(op_ptr + 48);
5673   NDB_PREFETCH_WRITE(op_ptr + 64);
5674 }
5675 
5676 bool
seize_op_rec(TcConnectionrecPtr & tcConnectptr)5677 Dblqh::seize_op_rec(TcConnectionrecPtr& tcConnectptr)
5678 {
5679   TcConnectionrecPtr opPtr;
5680   if (unlikely(!tcConnect_pool.seize(opPtr)))
5681   {
5682     jam();
5683     return false;
5684   }
5685   prefetch_op_record_4((Uint32*)opPtr.p);
5686   ndbrequire(opPtr.i >= ctcConnectReserved);
5687   if (unlikely(!c_acc->seize_op_rec(opPtr.i,
5688                                     reference(),
5689                                     opPtr.p->accConnectrec,
5690                                     &opPtr.p->accConnectPtrP)))
5691   {
5692     goto acc_fail;
5693   }
5694   if (unlikely(!c_tup->seize_op_rec(opPtr.i,
5695                                     reference(),
5696                                     opPtr.p->tupConnectrec,
5697                                     &opPtr.p->tupConnectPtrP)))
5698   {
5699     goto tup_fail;
5700   }
5701   opPtr.p->tcTimer = cLqhTimeOutCount;
5702   c_tup->prepare_op_pointer(opPtr.p->tupConnectrec,
5703                             opPtr.p->tupConnectPtrP);
5704   tcConnectptr = opPtr;
5705   m_tc_connect_ptr = opPtr;
5706   ndbrequire(Magic::check_ptr(opPtr.p->accConnectPtrP));
5707   ndbrequire(Magic::check_ptr(opPtr.p->tupConnectPtrP));
5708   return true;
5709 
5710 tup_fail:
5711   jam();
5712   ndbrequire(Magic::check_ptr(opPtr.p));
5713   c_acc->release_op_rec(opPtr.p->accConnectrec,
5714                         opPtr.p->accConnectPtrP);
5715 
5716 acc_fail:
5717   jam();
5718   ndbrequire(Magic::check_ptr(opPtr.p));
5719   tcConnect_pool.release(opPtr);
5720   checkPoolShrinkNeed(DBLQH_OPERATION_RECORD_TRANSIENT_POOL_INDEX,
5721                       tcConnect_pool);
5722   return false;
5723 }
5724 
5725 void
release_op_rec(TcConnectionrecPtr opPtr)5726 Dblqh::release_op_rec(TcConnectionrecPtr opPtr)
5727 {
5728   c_tup->release_op_rec(opPtr.p->tupConnectrec,
5729                         opPtr.p->tupConnectPtrP);
5730   c_acc->release_op_rec(opPtr.p->accConnectrec,
5731                         opPtr.p->accConnectPtrP);
5732   tcConnect_pool.release(opPtr);
5733   checkPoolShrinkNeed(DBLQH_OPERATION_RECORD_TRANSIENT_POOL_INDEX,
5734                       tcConnect_pool);
5735 }
5736 
5737 /* ==========================================================================
5738  * =======                        SEIZE TC CONNECT RECORD             =======
5739  *
5740  *       GETS A NEW TC CONNECT RECORD FROM FREELIST.
5741  * ========================================================================= */
seizeTcrec(TcConnectionrecPtr & tcConnectptr)5742 void Dblqh::seizeTcrec(TcConnectionrecPtr& tcConnectptr)
5743 {
5744   TcConnectionrecPtr locTcConnectptr;
5745 
5746   locTcConnectptr.i = cfirstfreeTcConrec;
5747 
5748   Uint32 numFree = ctcNumFree;
5749   Uint32 timeOutCount = cLqhTimeOutCount;
5750 
5751   ndbrequire(tcConnect_pool.getUncheckedPtrRW(locTcConnectptr));
5752 
5753   /**
5754    * We are going to write to most of the operation object which is
5755    * likely to cover at least 240 bytes and thus at least 4
5756    * cache lines ahead. We will prefetch those immediately now
5757    * to ensure that memory accesses are started as soon as possible.
5758    */
5759   prefetch_op_record_4((Uint32*)locTcConnectptr.p);
5760 
5761   Uint32 nextTc = locTcConnectptr.p->nextTcConnectrec;
5762 
5763   locTcConnectptr.p->nextTcConnectrec = RNIL;
5764   locTcConnectptr.p->clientConnectrec = RNIL;
5765   locTcConnectptr.p->clientBlockref = RNIL;
5766   locTcConnectptr.p->tableref = RNIL;
5767   locTcConnectptr.p->hashIndex = RNIL;
5768   locTcConnectptr.p->nextSeqNoReplica = 0;
5769   locTcConnectptr.p->seqNoReplica = 0;
5770   locTcConnectptr.p->m_committed_log_space = 0;
5771   locTcConnectptr.p->m_dealloc_state = TcConnectionrec::DA_IDLE;
5772   locTcConnectptr.p->m_dealloc_data.m_unused = RNIL;
5773   ndbrequire(Magic::check_ptr(locTcConnectptr.p));
5774 
5775   ctcNumFree = numFree - 1;
5776   cfirstfreeTcConrec = nextTc;
5777 
5778   locTcConnectptr.p->tcTimer = timeOutCount;
5779   locTcConnectptr.p->abortState = TcConnectionrec::ABORT_IDLE;
5780   locTcConnectptr.p->connectState = TcConnectionrec::CONNECTED;
5781   locTcConnectptr.p->savePointId = 0;
5782   locTcConnectptr.p->gci_hi = 0;
5783   locTcConnectptr.p->gci_lo = 0;
5784   locTcConnectptr.p->errorCode = 0;
5785   c_tup->prepare_op_pointer(locTcConnectptr.p->tupConnectrec,
5786                             locTcConnectptr.p->tupConnectPtrP);
5787 
5788   tcConnectptr = locTcConnectptr;
5789   m_tc_connect_ptr = locTcConnectptr;
5790   ndbrequire(Magic::check_ptr(locTcConnectptr.p->tupConnectPtrP));
5791 }//Dblqh::seizeTcrec()
5792 
5793 bool
checkTransporterOverloaded(Signal * signal,const NodeBitmask & all,const LqhKeyReq * req)5794 Dblqh::checkTransporterOverloaded(Signal* signal,
5795                                   const NodeBitmask& all,
5796                                   const LqhKeyReq* req)
5797 {
5798   /* FC : Quick exit if the mask is clear? */
5799   // nodes likely to be affected by this op
5800   NodeBitmask mask;
5801   // tc
5802   Uint32 tc_node = refToNode(req->tcBlockref);
5803   if (tc_node < MAX_NODES) // not worth to crash here
5804     mask.set(tc_node);
5805   const Uint8 op = LqhKeyReq::getOperation(req->requestInfo);
5806   if (op == ZREAD || op == ZREAD_EX || op == ZUNLOCK) {
5807     // the receiver
5808     Uint32 api_node = refToNode(req->variableData[0]);
5809     if (api_node < MAX_NODES) // not worth to crash here
5810       mask.set(api_node);
5811   } else {
5812     // next replica
5813     Uint32 replica_node = LqhKeyReq::getNextReplicaNodeId(req->fragmentData);
5814     if (replica_node < MAX_NODES) // could be ZNIL
5815       mask.set(replica_node);
5816     // event subscribers
5817     const Suma* suma = (Suma*)globalData.getBlock(SUMA);
5818     mask.bitOR(suma->getSubscriberNodes());
5819   }
5820   mask.bitAND(all);
5821   if (likely(mask.isclear()))
5822   {
5823     return false;
5824   }
5825 
5826   jam();
5827   /* Overloaded, do some accounting */
5828   c_keyOverloads++;
5829 
5830   if (tc_node < MAX_NODES && all.get(tc_node))
5831   {
5832     jam();
5833     c_keyOverloadsTcNode++;
5834   }
5835 
5836   if (op == ZREAD || op == ZREAD_EX || op == ZUNLOCK)
5837   {
5838     jam();
5839     // the receiver
5840     Uint32 api_node = refToNode(req->variableData[0]);
5841     if ((api_node < MAX_NODES) && // not worth to crash here
5842         (all.get(api_node)))
5843     {
5844       jam();
5845       c_keyOverloadsReaderApi++;
5846     }
5847   }
5848   else
5849   {
5850     jam();
5851     // write
5852     // next replica
5853     Uint32 replica_node = LqhKeyReq::getNextReplicaNodeId(req->fragmentData);
5854     if ((replica_node < MAX_NODES) &&
5855         (all.get(replica_node)))
5856     {
5857       jam();
5858       c_keyOverloadsPeerNode++;
5859     }
5860 
5861     // event subscribers
5862     const Suma* suma = (Suma*)globalData.getBlock(SUMA);
5863     NodeBitmask subscribers = suma->getSubscriberNodes();
5864     subscribers.bitAND(all);
5865     if (!subscribers.isclear())
5866     {
5867       jam();
5868       c_keyOverloadsSubscriber++;
5869     }
5870   }
5871 
5872   return true;
5873 }
5874 
execSIGNAL_DROPPED_REP(Signal * signal)5875 void Dblqh::execSIGNAL_DROPPED_REP(Signal* signal)
5876 {
5877   /* An incoming signal was dropped, handle it
5878    * Dropped signal really means that we ran out of
5879    * long signal buffering to store its sections
5880    */
5881   jamEntry();
5882 
5883   if (!assembleDroppedFragments(signal))
5884   {
5885     jam();
5886     return;
5887   }
5888 
5889   const SignalDroppedRep* rep = (SignalDroppedRep*) &signal->theData[0];
5890   Uint32 originalGSN= rep->originalGsn;
5891   const bool isLongReq = (rep->originalSectionCount > 0);
5892 
5893   LQH_DEBUG("SignalDroppedRep received for GSN " << originalGSN);
5894 
5895   switch(originalGSN) {
5896   case GSN_LQHKEYREQ:
5897   {
5898     jam();
5899     /* Get original signal data - unfortunately it may
5900      * have been truncated.  We must not read beyond
5901      * word # 22
5902      * We will notify the client that their LQHKEYREQ
5903      * failed
5904      */
5905     TcConnectionrecPtr tcConnectptr;
5906     tcConnectptr.i = RNIL;
5907     const LqhKeyReq * const truncatedLqhKeyReq =
5908       (LqhKeyReq *) &rep->originalData[0];
5909 
5910     earlyKeyReqAbort(signal,
5911                      truncatedLqhKeyReq,
5912                      isLongReq,
5913                      ZGET_DATAREC_ERROR,
5914                      tcConnectptr);
5915 
5916     break;
5917   }
5918   case GSN_SCAN_FRAGREQ:
5919   {
5920     jam();
5921     /* Get original signal data - unfortunately it may
5922      * have been truncated.  We must not read beyond
5923      * word # 22
5924      * We will notify the client that their SCAN_FRAGREQ
5925      * failed
5926      */
5927     // TODO : Handle fragmented failure
5928     const ScanFragReq* const truncatedScanFragReq =
5929       (ScanFragReq*) &rep->originalData[0];
5930     const Uint32 senderData= truncatedScanFragReq->senderData;
5931     const Uint32 transid1= truncatedScanFragReq->transId1;
5932     const Uint32 transid2= truncatedScanFragReq->transId2;
5933 
5934     /* Send SCAN_FRAGREF back to the client */
5935     ScanFragRef* ref= (ScanFragRef*)&signal->theData[0];
5936     ref->senderData= senderData;
5937     ref->transId1= transid1;
5938     ref->transId2= transid2;
5939     ref->errorCode= ZGET_ATTRINBUF_ERROR;
5940 
5941     sendSignal(signal->senderBlockRef(), GSN_SCAN_FRAGREF, signal,
5942                ScanFragRef::SignalLength, JBB);
5943     break;
5944   }
5945   default:
5946     jam();
5947     /* Don't expect dropped signals for other GSNs,
5948      * default handling
5949      */
5950     SimulatedBlock::execSIGNAL_DROPPED_REP(signal);
5951   };
5952 
5953   return;
5954 }
5955 
5956 // Get size of interpreted program, in words.
getProgramWordCount(SegmentedSectionPtr attrInfo)5957 static inline Uint32 getProgramWordCount(SegmentedSectionPtr attrInfo)
5958 {
5959   /*
5960     The second word of 'attrinfo' contains the length of the interpreted
5961     program, and the fifth contains the length of associated subroutines.
5962     (There should be a header of 5 length fields at the start of
5963     'attrinfo'.)
5964   */
5965   assert(attrInfo.sz>=5);
5966   SectionReader aiReader(attrInfo, g_sectionSegmentPool);
5967   Uint32 header[5];
5968   const bool ok =
5969     aiReader.getWords(header, sizeof(header)/sizeof(header[0]));
5970   assert(ok);
5971   (void) ok; // Prevent compiler warning.
5972   return header[1]+header[4];
5973 }
5974 
5975 /* ------------------------------------------------------------------------- */
5976 /* -------                TAKE CARE OF LQHKEYREQ                     ------- */
5977 /* LQHKEYREQ IS THE SIGNAL THAT STARTS ALL OPERATIONS IN THE LQH BLOCK       */
5978 /* THIS SIGNAL CONTAINS A LOT OF INFORMATION ABOUT WHAT TYPE OF OPERATION,   */
5979 /* KEY INFORMATION, ATTRIBUTE INFORMATION, NODE INFORMATION AND A LOT MORE   */
5980 /* ------------------------------------------------------------------------- */
execLQHKEYREQ(Signal * signal)5981 void Dblqh::execLQHKEYREQ(Signal* signal)
5982 {
5983   if (unlikely(!assembleFragments(signal)))
5984   {
5985     jam();
5986     return;
5987   }
5988   UintR sig0, sig1, sig2, sig3, sig4, sig5;
5989   Uint8 tfragDistKey;
5990 
5991   const LqhKeyReq * const lqhKeyReq = (LqhKeyReq *)signal->getDataPtr();
5992   SectionHandle handle(this, signal);
5993   const bool isLongReq = (handle.m_cnt > 0);
5994   TcConnectionrecPtr tcConnectptr;
5995   tcConnectptr.i = RNIL;
5996 
5997   {
5998     const NodeBitmask& all = globalTransporterRegistry.get_status_overloaded();
5999     if (unlikely(!all.isclear()))
6000     {
6001       if (checkTransporterOverloaded(signal, all, lqhKeyReq))
6002       {
6003         /* Overloaded, reject new work */
6004         jam();
6005         releaseSections(handle);
6006         earlyKeyReqAbort(signal, lqhKeyReq, isLongReq,
6007                          ZTRANSPORTER_OVERLOADED_ERROR,
6008                          tcConnectptr);
6009         return;
6010       }
6011     }
6012   }
6013 
6014   const UintR Treqinfo = lqhKeyReq->requestInfo;
6015 
6016   if (ERROR_INSERTED(5078) &&
6017       refToMain(signal->header.theSendersBlockRef) == DBSPJ &&
6018       LqhKeyReq::getDirtyFlag(Treqinfo) &&
6019       !LqhKeyReq::getNormalProtocolFlag(Treqinfo))
6020   {
6021     /**
6022      * This is used to trigger Bug#16187976 "NDBD NODE FAILS TO START WITH
6023      * ILLEGAL SIGNAL RECEIVED (GSN 121 NOT ADDED)". This bug occurs if a
6024      * ROUTE_ORD signal carrying a TCKEYREC signal is sent via the SPJ block.
6025      * ROUTE_ORD signals should always be sent via TC, which unlike SPJ should
6026      * be connected to the API. (Otherwise, the API will initiate its own
6027      * error handling which will compensate for TCKEYREC and other missing
6028      * signals.) The tests above check that we use the short-circuited protocol,
6029      * meaning that LQH wants to send TCKEYREC directly to the API, instead
6030      * of sending LQHKEYREC to SPJ (or TC).
6031      * Here we enable a different error insert (5079) which we test for in
6032      * Dblqh::sendTCKEYREF() below. It is done this way since in
6033      * sendTCKEYREF() we would otherwise not have sufficient context to tell
6034      * when to send the ROUTE_ORD signal.
6035      */
6036     SET_ERROR_INSERT_VALUE(5079);
6037   }
6038 
6039   if (ERROR_INSERTED_CLEAR(5047) ||
6040       ERROR_INSERTED(5079) ||
6041      (ERROR_INSERTED(5102) &&
6042       LqhKeyReq::getNoTriggersFlag(Treqinfo)) ||
6043      (ERROR_INSERTED(5103) &&
6044       LqhKeyReq::getOperation(Treqinfo) == ZDELETE) ||
6045      (ERROR_INSERTED(5104) &&
6046       LqhKeyReq::getOperation(Treqinfo) == ZINSERT) ||
6047      (ERROR_INSERTED(5105) &&
6048       LqhKeyReq::getOperation(Treqinfo) == ZUPDATE) ||
6049       ERROR_INSERTED(5098))
6050   {
6051     jam();
6052     releaseSections(handle);
6053     earlyKeyReqAbort(signal, lqhKeyReq, isLongReq,
6054                      ZTRANSPORTER_OVERLOADED_ERROR,
6055                      tcConnectptr);
6056     return;
6057   }
6058 
6059   sig0 = lqhKeyReq->clientConnectPtr;
6060   if (likely((ctcNumFree > ZNUM_RESERVED_UTIL_CONNECT_RECORDS &&
6061               !ERROR_INSERTED(5031)) ||
6062              (ctcNumFree > ZNUM_RESERVED_TC_CONNECT_RECORDS &&
6063               LqhKeyReq::getUtilFlag(Treqinfo))))
6064   {
6065     jamEntry();
6066     seizeTcrec(tcConnectptr);
6067   }
6068   else
6069   {
6070     jamEntry();
6071     if (unlikely(ERROR_INSERTED_CLEAR(5031) ||
6072                  (!seize_op_rec(tcConnectptr))))
6073     {
6074       jam();
6075 /* ------------------------------------------------------------------------- */
6076 /* NO FREE TC RECORD AVAILABLE, THUS WE CANNOT HANDLE THE REQUEST.           */
6077 /* ------------------------------------------------------------------------- */
6078       releaseSections(handle);
6079       earlyKeyReqAbort(signal,
6080                        lqhKeyReq,
6081                        isLongReq,
6082                        ZNO_TC_CONNECT_ERROR,
6083                        tcConnectptr);
6084       return;
6085     }
6086   }//if
6087 
6088   if(ERROR_INSERTED(5038) &&
6089      refToNode(signal->getSendersBlockRef()) != getOwnNodeId()){
6090     jam();
6091     releaseSections(handle);
6092     SET_ERROR_INSERT_VALUE(5039);
6093     return;
6094   }
6095 
6096   Uint32 tot_lqh_key_req_count = cTotalLqhKeyReqCount;
6097   Uint32 num_operations = c_Counters.operations;
6098 
6099   TcConnectionrec * const regTcPtr = tcConnectptr.p;
6100   jamLine(tcConnectptr.i & 0xFFFF);
6101   c_Counters.operations = num_operations + 1;
6102   cTotalLqhKeyReqCount = tot_lqh_key_req_count + 1;
6103 
6104   Uint32 senderRef = regTcPtr->clientBlockref = signal->senderBlockRef();
6105   regTcPtr->clientConnectrec = sig0;
6106   regTcPtr->tcOprec = sig0;
6107   regTcPtr->tcHashKeyHi = 0;
6108   regTcPtr->lqhKeyReqId = cTotalLqhKeyReqCount;
6109   regTcPtr->commitAckMarker = RNIL;
6110   regTcPtr->m_flags= 0;
6111   if (isLongReq)
6112   {
6113     regTcPtr->m_flags|= TcConnectionrec::OP_ISLONGREQ;
6114   }
6115 
6116   UintR attrLenFlags = lqhKeyReq->attrLen;
6117   sig1 = lqhKeyReq->savePointId;
6118   sig2 = lqhKeyReq->hashValue;
6119   sig4 = lqhKeyReq->tableSchemaVersion;
6120   sig5 = lqhKeyReq->tcBlockref;
6121   // Ensure that ROUTE_ORD (carrying TCKEYREF) will not be sent to SPJ.
6122   ndbassert(refToNode(signal->getSendersBlockRef()) == getOwnNodeId() ||
6123             !LqhKeyReq::getDirtyFlag(lqhKeyReq->requestInfo) ||
6124             LqhKeyReq::getNormalProtocolFlag(lqhKeyReq->requestInfo) ||
6125             LqhKeyReq::getOperation(lqhKeyReq->requestInfo) != ZREAD ||
6126             refToMain(lqhKeyReq->tcBlockref) == DBTC);
6127 
6128   regTcPtr->savePointId = sig1;
6129   regTcPtr->hashValue = sig2;
6130   const Uint32 schemaVersion = regTcPtr->schemaVersion = LqhKeyReq::getSchemaVersion(sig4);
6131   tabptr.i = LqhKeyReq::getTableId(sig4);
6132   regTcPtr->tcBlockref = sig5;
6133 
6134   const Uint8 op = LqhKeyReq::getOperation(Treqinfo);
6135   if (ERROR_INSERTED(5080) ||
6136       ((op == ZREAD || op == ZREAD_EX) && !getAllowRead()))
6137   {
6138     releaseSections(handle);
6139     earlyKeyReqAbort(signal,
6140                      lqhKeyReq,
6141                      isLongReq,
6142                      ZNODE_SHUTDOWN_IN_PROGRESS,
6143                      tcConnectptr);
6144     return;
6145   }
6146 
6147   if (ERROR_INSERTED(5081) ||
6148       unlikely(get_node_status(refToNode(sig5)) != ZNODE_UP))
6149   {
6150     releaseSections(handle);
6151     earlyKeyReqAbort(signal,
6152                      lqhKeyReq,
6153                      isLongReq,
6154                      ZNODE_SHUTDOWN_IN_PROGRESS,
6155                      tcConnectptr);
6156     return;
6157   }
6158 
6159   Uint32 senderVersion = getNodeInfo(refToNode(senderRef)).m_version;
6160 
6161   regTcPtr->tcScanInfo  = lqhKeyReq->scanInfo;
6162   regTcPtr->indTakeOver = LqhKeyReq::getScanTakeOverFlag(attrLenFlags);
6163   regTcPtr->m_reorg     = LqhKeyReq::getReorgFlag(attrLenFlags);
6164 
6165   regTcPtr->readlenAi = 0;
6166   regTcPtr->currTupAiLen = 0;
6167   regTcPtr->logWriteState = TcConnectionrec::NOT_STARTED;
6168   regTcPtr->fragmentptr = RNIL;
6169 
6170   sig0 = lqhKeyReq->fragmentData;
6171   sig1 = lqhKeyReq->transId1;
6172   sig2 = lqhKeyReq->transId2;
6173   sig3 = lqhKeyReq->variableData[0];
6174   sig4 = lqhKeyReq->variableData[1];
6175 
6176   regTcPtr->fragmentid = LqhKeyReq::getFragmentId(sig0);
6177   regTcPtr->nextReplica = LqhKeyReq::getNextReplicaNodeId(sig0);
6178   regTcPtr->transid[0] = sig1;
6179   regTcPtr->transid[1] = sig2;
6180   regTcPtr->applRef = sig3;
6181   regTcPtr->applOprec = sig4;
6182 
6183   if (LqhKeyReq::getMarkerFlag(Treqinfo))
6184   {
6185     struct CommitAckMarker check;
6186     CommitAckMarkerPtr markerPtr;
6187     jamDebug();
6188     check.transid1 = regTcPtr->transid[0];
6189     check.transid2 = regTcPtr->transid[1];
6190 
6191     if (m_commitAckMarkerHash.find(markerPtr, check))
6192     {
6193       /*
6194         A commit ack marker was already placed here for this transaction.
6195         We increase the reference count to ensure we don't remove the
6196         commit ack marker prematurely.
6197       */
6198       ndbrequire(markerPtr.p->in_hash == true);
6199       ndbrequire(markerPtr.p->reference_count > 0);
6200       markerPtr.p->reference_count++;
6201 #ifdef MARKER_TRACE
6202       ndbout_c("Inc marker[%.8x %.8x] op: %u ref: %u",
6203                markerPtr.p->transid1, markerPtr.p->transid2,
6204                tcConnectptr.i, markerPtr.p->reference_count);
6205 #endif
6206     }
6207     else
6208     {
6209       if (ERROR_INSERTED(5082) ||
6210           unlikely(!m_commitAckMarkerPool.seize(markerPtr)))
6211       {
6212         releaseSections(handle);
6213         earlyKeyReqAbort(signal, lqhKeyReq, isLongReq,
6214                          ZNO_FREE_MARKER_RECORDS_ERROR,
6215                          tcConnectptr);
6216         return;
6217       }
6218       markerPtr.p->transid1 = sig1;
6219       markerPtr.p->transid2 = sig2;
6220       markerPtr.p->apiRef   = sig3;
6221       markerPtr.p->apiOprec = sig4;
6222       markerPtr.p->tcRef = sig5;
6223       markerPtr.p->reference_count = 1;
6224       markerPtr.p->in_hash = true;
6225       markerPtr.p->removed_by_fail_api = false;
6226       m_commitAckMarkerHash.add(markerPtr);
6227 
6228 #ifdef MARKER_TRACE
6229       ndbout_c("%u Add marker[%.8x %.8x] op: %u", instance(), markerPtr.p->transid1, markerPtr.p->transid2, tcConnectptr.i);
6230 #endif
6231     }
6232     regTcPtr->commitAckMarker = markerPtr.i;
6233   }
6234 
6235   regTcPtr->reqinfo = Treqinfo;
6236   regTcPtr->lastReplicaNo = LqhKeyReq::getLastReplicaNo(Treqinfo);
6237   regTcPtr->dirtyOp       = LqhKeyReq::getDirtyFlag(Treqinfo);
6238   regTcPtr->opExec        = LqhKeyReq::getInterpretedFlag(Treqinfo);
6239   regTcPtr->opSimple      = LqhKeyReq::getSimpleFlag(Treqinfo);
6240   regTcPtr->seqNoReplica  = LqhKeyReq::getSeqNoReplica(Treqinfo);
6241   regTcPtr->m_use_rowid   = LqhKeyReq::getRowidFlag(Treqinfo);
6242   regTcPtr->m_dealloc_state     = TcConnectionrec::DA_IDLE;
6243   regTcPtr->m_dealloc_data.m_dealloc_ref_count = RNIL;
6244   {
6245     regTcPtr->operation = (Operation_t) op == ZREAD_EX ? ZREAD : (Operation_t) op;
6246     regTcPtr->lockType =
6247       op == ZREAD_EX ? ZUPDATE :
6248       (Operation_t) op == ZWRITE ? ZINSERT :
6249       (Operation_t) op == ZREFRESH ? ZINSERT :
6250       (Operation_t) op == ZUNLOCK ? ZREAD : // lockType not relevant for unlock req
6251       (Operation_t) op;
6252   }
6253   if (LqhKeyReq::getNoWaitFlag(Treqinfo))
6254   {
6255     /* Check sender version before processing - older versions sent junk */
6256     if (likely(isLongReq &&
6257                senderVersion >= NDBD_NOWAIT_KEYREQ))
6258     {
6259       ndbassert(!regTcPtr->dirtyOp);
6260       ndbrequire((op == ZREAD) || (op == ZREAD_EX)); // For now
6261       regTcPtr->m_flags |= TcConnectionrec::OP_NOWAIT;
6262     }
6263   }
6264 #ifdef VM_TRACE
6265   if (unlikely(isLongReq &&
6266                LqhKeyReq::getLongClearBits(Treqinfo) != 0))
6267   {
6268     jam();
6269     /* Bits set which should not be - definite error on same version */
6270     const Uint32 ownVersion = getNodeInfo(getOwnNodeId()).m_version;
6271     if (senderVersion == ownVersion)
6272     {
6273       jam();
6274       ndbout_c("Received bad long request info %x from same version node %x %x",
6275                Treqinfo,
6276                senderVersion,
6277                ownVersion);
6278       ndbabort();
6279     }
6280   }
6281 #endif
6282 
6283   if (regTcPtr->dirtyOp)
6284   {
6285     ndbrequire(regTcPtr->opSimple);
6286   }
6287 
6288   CRASH_INSERTION2(5041, (op == ZREAD &&
6289                           (regTcPtr->opSimple || regTcPtr->dirtyOp) &&
6290                           refToNode(signal->senderBlockRef()) != cownNodeid));
6291 
6292   regTcPtr->numFiredTriggers = lqhKeyReq->numFiredTriggers;
6293 
6294   UintR TapplAddressInd = LqhKeyReq::getApplicationAddressFlag(Treqinfo);
6295   UintR nextPos = (TapplAddressInd << 1);
6296   UintR TsameClientAndTcOprec = LqhKeyReq::getSameClientAndTcFlag(Treqinfo);
6297   if (TsameClientAndTcOprec == 1) {
6298     regTcPtr->tcOprec = lqhKeyReq->variableData[nextPos];
6299     nextPos++;
6300   }//if
6301   UintR TnextReplicasIndicator = regTcPtr->lastReplicaNo -
6302                                  regTcPtr->seqNoReplica;
6303   if (TnextReplicasIndicator > 1) {
6304     regTcPtr->nodeAfterNext[0] = lqhKeyReq->variableData[nextPos] & 0xFFFF;
6305     regTcPtr->nodeAfterNext[1] = lqhKeyReq->variableData[nextPos] >> 16;
6306     nextPos++;
6307   }//if
6308   UintR TreadLenAiIndicator = LqhKeyReq::getReturnedReadLenAIFlag(Treqinfo);
6309   if (TreadLenAiIndicator == 1) {
6310     regTcPtr->readlenAi = lqhKeyReq->variableData[nextPos] & ZNIL;
6311     nextPos++;
6312   }//if
6313 
6314   Uint32 TanyValueFlag = LqhKeyReq::getCorrFactorFlag(Treqinfo);
6315   if (isLongReq && TanyValueFlag == 1)
6316   {
6317     /**
6318      * For short lqhkeyreq, ai-length in-signal is stored in same pos...
6319      */
6320     regTcPtr->m_corrFactorLo = lqhKeyReq->variableData[nextPos + 0];
6321     regTcPtr->m_corrFactorHi = lqhKeyReq->variableData[nextPos + 1];
6322     nextPos += 2;
6323   }
6324 
6325   regTcPtr->m_fire_trig_pass = 0;
6326   Uint32 Tdeferred = LqhKeyReq::getDeferredConstraints(Treqinfo);
6327   if (isLongReq && Tdeferred)
6328   {
6329     regTcPtr->m_flags |= TcConnectionrec::OP_DEFERRED_CONSTRAINTS;
6330   }
6331 
6332   Uint32 TdisableFk = LqhKeyReq::getDisableFkConstraints(Treqinfo);
6333   if (isLongReq && TdisableFk)
6334   {
6335     regTcPtr->m_flags |= TcConnectionrec::OP_DISABLE_FK;
6336   }
6337 
6338   Uint32 TnormalProtocolFlag = LqhKeyReq::getNormalProtocolFlag(Treqinfo);
6339   if (isLongReq && TnormalProtocolFlag)
6340   {
6341     /**
6342      * Only set normal protocol flag if long request.
6343      * As above, short lqhKeyReq ai-length in-signal overlaps the bit.
6344      * bug#14702377
6345      */
6346     regTcPtr->m_flags |= TcConnectionrec::OP_NORMAL_PROTOCOL;
6347   }
6348 
6349   if (isLongReq && LqhKeyReq::getNoTriggersFlag(Treqinfo))
6350   {
6351     regTcPtr->m_flags |= TcConnectionrec::OP_NO_TRIGGERS;
6352   }
6353 
6354   UintR TitcKeyLen = 0;
6355   Uint32 keyLenWithLQHReq = 0;
6356   UintR TreclenAiLqhkey   = 0;
6357 
6358   if (isLongReq)
6359   {
6360     jamDebug();
6361     /* Long LQHKEYREQ indicates Key and AttrInfo presence and
6362      * size via section lengths
6363      */
6364     SegmentedSectionPtr keyInfoSection, attrInfoSection;
6365 
6366     handle.getSection(keyInfoSection,
6367                       LqhKeyReq::KeyInfoSectionNum);
6368 
6369     ndbassert(keyInfoSection.i != RNIL);
6370 
6371     regTcPtr->keyInfoIVal= keyInfoSection.i;
6372     TitcKeyLen= keyInfoSection.sz;
6373     keyLenWithLQHReq= TitcKeyLen;
6374 
6375     Uint32 totalAttrInfoLen= 0;
6376     if (handle.getSection(attrInfoSection,
6377                           LqhKeyReq::AttrInfoSectionNum))
6378     {
6379       regTcPtr->attrInfoIVal= attrInfoSection.i;
6380       totalAttrInfoLen= attrInfoSection.sz;
6381     }
6382 
6383     regTcPtr->reclenAiLqhkey = 0;
6384     regTcPtr->currReclenAi = totalAttrInfoLen;
6385     regTcPtr->totReclenAi = totalAttrInfoLen;
6386 
6387     /* Detach sections from the handle, we are now responsible
6388      * for freeing them when appropriate
6389      */
6390     handle.clear();
6391   }
6392   else
6393   {
6394     jamDebug();
6395     /* Short LQHKEYREQ, Key and Attr sizes are in
6396      * signal, along with some data
6397      *
6398      * This is still used by RESTORE block for LCP restore.
6399      */
6400     TreclenAiLqhkey= LqhKeyReq::getAIInLqhKeyReq(Treqinfo);
6401     regTcPtr->reclenAiLqhkey = TreclenAiLqhkey;
6402     regTcPtr->currReclenAi = TreclenAiLqhkey;
6403     TitcKeyLen = LqhKeyReq::getKeyLen(Treqinfo);
6404     regTcPtr->totReclenAi = LqhKeyReq::getAttrLen(attrLenFlags);
6405 
6406     /* Note key can be length zero for NR when Rowid used */
6407     keyLenWithLQHReq= MIN(TitcKeyLen, LqhKeyReq::MaxKeyInfo);
6408 
6409     bool ok= appendToSection(regTcPtr->keyInfoIVal,
6410                              &lqhKeyReq->variableData[ nextPos ],
6411                              keyLenWithLQHReq);
6412     if (unlikely(!ok))
6413     {
6414       jam();
6415       earlyKeyReqAbort(signal,
6416                        lqhKeyReq,
6417                        isLongReq,
6418                        ZGET_DATAREC_ERROR,
6419                        tcConnectptr);
6420       return;
6421     }
6422 
6423     nextPos+= keyLenWithLQHReq;
6424   }
6425 
6426   regTcPtr->primKeyLen = TitcKeyLen;
6427 
6428   /* Only node restart copy allowed to send no KeyInfo */
6429   if (unlikely(keyLenWithLQHReq == 0))
6430   {
6431     jamDebug();
6432     /**
6433      * Only allowed use case for no primary key is DELETE by ROWID.
6434      */
6435     if (refToMain(senderRef) == DBSPJ)
6436     {
6437       jam();
6438       ndbassert(! LqhKeyReq::getNrCopyFlag(Treqinfo));
6439 
6440       /* Reply with NO_TUPLE_FOUND */
6441       earlyKeyReqAbort(signal,
6442                        lqhKeyReq,
6443                        isLongReq,
6444                        ZNO_TUPLE_FOUND,
6445                        tcConnectptr);
6446       return;
6447     }
6448 
6449     if (! LqhKeyReq::getNrCopyFlag(Treqinfo))
6450     {
6451       LQHKEY_error(signal, 3);
6452       return;
6453     }//if
6454   }
6455 
6456   sig0 = lqhKeyReq->variableData[nextPos + 0];
6457   sig1 = lqhKeyReq->variableData[nextPos + 1];
6458   regTcPtr->m_row_id.m_page_no = sig0;
6459   regTcPtr->m_row_id.m_page_idx = sig1;
6460   nextPos += 2 * LqhKeyReq::getRowidFlag(Treqinfo);
6461 
6462   jamLineDebug(Uint16(nextPos));
6463 
6464   sig2 = lqhKeyReq->variableData[nextPos + 0];
6465   sig3 = cnewestGci;
6466   /* If gci_hi provided, take it and set gci_lo to max value
6467    * Otherwise, it will be decided by TUP at commit time as normal
6468    */
6469   regTcPtr->gci_hi = LqhKeyReq::getGCIFlag(Treqinfo) ? sig2 : sig3;
6470   regTcPtr->gci_lo = LqhKeyReq::getGCIFlag(Treqinfo) ? ~Uint32(0) : 0;
6471   nextPos += LqhKeyReq::getGCIFlag(Treqinfo);
6472 
6473   if (LqhKeyReq::getRowidFlag(Treqinfo))
6474   {
6475     ndbassert(refToMain(senderRef) != DBTC);
6476   }
6477   else if(op == ZINSERT)
6478   {
6479     ndbassert(refToMain(senderRef) == DBTC);
6480   }
6481 
6482   if (unlikely((LqhKeyReq::FixedSignalLength + nextPos + TreclenAiLqhkey) !=
6483       signal->length()))
6484   {
6485     g_eventLogger->info("nextPos: %u, TreclenAiLqhkey: %u, siglen: %u",
6486                         nextPos, TreclenAiLqhkey, signal->length());
6487     LQHKEY_error(signal, 2);
6488     return;
6489   }//if
6490   UintR TseqNoReplica = regTcPtr->seqNoReplica;
6491   UintR TlastReplicaNo = regTcPtr->lastReplicaNo;
6492   if (TseqNoReplica == TlastReplicaNo) {
6493     jamDebug();
6494     regTcPtr->nextReplica = ZNIL;
6495   } else {
6496     if (TseqNoReplica < TlastReplicaNo) {
6497       jamDebug();
6498       regTcPtr->nextSeqNoReplica = TseqNoReplica + 1;
6499       if ((regTcPtr->nextReplica == 0) ||
6500           (regTcPtr->nextReplica == cownNodeid)) {
6501         LQHKEY_error(signal, 0);
6502       }//if
6503     } else {
6504       LQHKEY_error(signal, 4);
6505       return;
6506     }//if
6507   }//if
6508 
6509   /**
6510    * If this is a 'dirtyOp' we dont care about transaction semantics.
6511    * There will then be no further abort, commit or unlock requests for
6512    * this operation. Thus, we will never have to find this operation
6513    * in the hashlist by calling findTransaction().
6514    * If also all ATTR- and KEYINFOs has been received, there will be no
6515    * ::execKEY- or ATTRINFO. (Long request, or all INFO fit in the REQ.)
6516    *
6517    * Thus we skip insertion in hashlist whenever not required.
6518    */
6519   if (regTcPtr->dirtyOp == ZFALSE ||                  //Transactional operation
6520       regTcPtr->primKeyLen > keyLenWithLQHReq ||      //Await more KEYINFO
6521       regTcPtr->totReclenAi > regTcPtr->currReclenAi) //Await more ATTRINFO
6522   {
6523     jamDebug();
6524     /* Check that no equal element exists */
6525     ndbassert(findTransaction(regTcPtr->transid[0], regTcPtr->transid[1],
6526                               regTcPtr->tcOprec, regTcPtr->tcHashKeyHi,
6527                               tcConnectptr) == ZNOT_FOUND);
6528 
6529     TcConnectionrecPtr localNextTcConnectptr;
6530     Uint32 hashIndex = (regTcPtr->transid[0] ^ regTcPtr->tcOprec) &
6531                        (TRANSID_HASH_SIZE - 1);
6532     localNextTcConnectptr.i = ctransidHash[hashIndex];
6533     ctransidHash[hashIndex] = tcConnectptr.i;
6534     regTcPtr->prevHashRec = RNIL;
6535     regTcPtr->nextHashRec = localNextTcConnectptr.i;
6536     regTcPtr->hashIndex = hashIndex;
6537     if (localNextTcConnectptr.i != RNIL) {
6538       jam();
6539       ndbrequire(tcConnect_pool.getValidPtr(localNextTcConnectptr));
6540       ndbassert(localNextTcConnectptr.p->prevHashRec == RNIL);
6541       localNextTcConnectptr.p->prevHashRec = tcConnectptr.i;
6542     }//if
6543   }//if
6544   if (tabptr.i >= ctabrecFileSize)
6545   {
6546     LQHKEY_error(signal, 5);
6547     return;
6548   }//if
6549   ptrAss(tabptr, tablerec);
6550   if(table_version_major_lqhkeyreq(tabptr.p->schemaVersion) !=
6551      table_version_major_lqhkeyreq(schemaVersion)){
6552     LQHKEY_abort(signal, 5, tcConnectptr);
6553     return;
6554   }
6555 
6556   if (unlikely(tabptr.p->tableStatus != Tablerec::TABLE_DEFINED))
6557   {
6558     if (check_tabstate(signal, tabptr.p, op, tcConnectptr))
6559       return;
6560   }
6561 
6562   regTcPtr->tableref = tabptr.i;
6563   regTcPtr->m_disk_table = tabptr.p->m_disk_table;
6564   if(refToMain(signal->senderBlockRef()) == RESTORE)
6565     regTcPtr->m_disk_table &= !LqhKeyReq::getNoDiskFlag(Treqinfo);
6566   else if(op == ZREAD || op == ZREAD_EX || op == ZUPDATE)
6567     regTcPtr->m_disk_table &= !LqhKeyReq::getNoDiskFlag(Treqinfo);
6568 
6569   if (op == ZREAD || op == ZREAD_EX || op == ZUNLOCK)
6570     tabptr.p->usageCountR++;
6571   else
6572     tabptr.p->usageCountW++;
6573 
6574   if (!getFragmentrec(signal, regTcPtr->fragmentid)) {
6575     LQHKEY_error(signal, 6);
6576     return;
6577   }//if
6578 
6579   if (LqhKeyReq::getNrCopyFlag(Treqinfo) &&
6580       refToMain(senderRef) != RESTORE)
6581   {
6582     ndbassert(refToMain(senderRef) == DBLQH);
6583     ndbassert(LqhKeyReq::getRowidFlag(Treqinfo));
6584     if (! (fragptr.p->fragStatus == Fragrecord::ACTIVE_CREATION))
6585     {
6586       ndbout_c("fragptr.p->fragStatus: %d",
6587 	       fragptr.p->fragStatus);
6588       CRASH_INSERTION(5046);
6589     }
6590     /**
6591      * We discover start of Node recovery phase in starting node
6592      * by seeing the first LQHKEYREQ arrive with getNrCopyFlag set.
6593      * We will set it on every LQHKEYREQ, only the first is really
6594      * needed. We set state to Fragrecord::AC_IGNORED in the
6595      * PREPARE_COPY_FRAGREQ. We could participate in transactions
6596      * even before the first copy row has been received. In this
6597      * case we can safely ignore the row, so this code ensures that
6598      * we won't ignore rows later rows after the first copy row
6599      * has been received. When this row has been received we need
6600      * to check if the UPDATE/DELETEs received from normal transactions
6601      * have to be applied since the row could have arrived before
6602      * the transaction then.
6603      */
6604     ndbassert(fragptr.p->fragStatus == Fragrecord::ACTIVE_CREATION);
6605     fragptr.p->m_copy_started_state = Fragrecord::AC_NR_COPY;
6606 
6607     if (op == ZDELETE)
6608       c_fragCopyRowsDel++;
6609     else
6610       c_fragCopyRowsIns++;
6611 
6612     c_fragBytesCopied+= (signal->length() << 2);
6613   }
6614   else
6615   {
6616     Fragrecord::UsageStat& useStat = fragptr.p->m_useStat;
6617     /* Don't count for NR fragcopy, just 'normal' operation */
6618     switch (op)
6619     {
6620     case ZREAD:
6621     case ZREAD_EX:
6622     case ZUNLOCK:
6623       useStat.m_readKeyReqCount++;
6624       break;
6625 
6626     case ZUPDATE:
6627       useStat.m_updKeyReqCount++;
6628       break;
6629 
6630     case ZINSERT:
6631       useStat.m_insKeyReqCount++;
6632       break;
6633 
6634     case ZWRITE:
6635       useStat.m_writeKeyReqCount++;
6636       break;
6637 
6638     case ZDELETE:
6639       useStat.m_delKeyReqCount++;
6640       break;
6641 
6642     default:
6643       // ZREFRESH is not counted.
6644       break;
6645     }
6646     useStat.m_keyReqAttrWords += regTcPtr->totReclenAi;
6647     useStat.m_keyReqKeyWords += TitcKeyLen;
6648     if (unlikely(LqhKeyReq::getInterpretedFlag(Treqinfo) && isLongReq))
6649     {
6650       /*
6651         Complete attrinfo may not have been received yet for short-signal
6652         lookups. We ignore these, since they only happen during online
6653         upgrade.
6654       */
6655       ndbassert(regTcPtr->attrInfoIVal != RNIL);
6656       SegmentedSectionPtr attrInfo;
6657       getSection(attrInfo, regTcPtr->attrInfoIVal);
6658       useStat.m_keyProgramWords += getProgramWordCount(attrInfo);
6659     }
6660   }
6661 
6662   Uint8 TcopyType = fragptr.p->fragCopy;
6663   Uint32 logPart = fragptr.p->m_log_part_ptr_i;
6664   tfragDistKey = fragptr.p->fragDistributionKey;
6665   if (fragptr.p->fragStatus == Fragrecord::ACTIVE_CREATION) {
6666     jam();
6667     /**
6668      * Starting node in active creation mode, we set activeCreat to
6669      * either AC_IGNORED (before first copy row arrived, or to
6670      * AC_NR_COPY after first copy row arrived. We set activeCreat
6671      * to AC_IGNORED also when we discover that we should ignore
6672      * the row since it updates a row which we haven't received
6673      * a copy row for yet.
6674      */
6675     regTcPtr->activeCreat = fragptr.p->m_copy_started_state;
6676     CRASH_INSERTION(5002);
6677     CRASH_INSERTION2(5042, tabptr.i == c_error_insert_table_id);
6678   } else {
6679     regTcPtr->activeCreat = Fragrecord::AC_NORMAL;
6680   }//if
6681   regTcPtr->replicaType = TcopyType;
6682   regTcPtr->fragmentptr = fragptr.i;
6683   regTcPtr->m_log_part_ptr_i = logPart;
6684   Uint8 TdistKey = LqhKeyReq::getDistributionKey(attrLenFlags);
6685   if ((tfragDistKey != TdistKey) &&
6686       (regTcPtr->seqNoReplica == 0) &&
6687       (regTcPtr->dirtyOp == ZFALSE))
6688   {
6689     /* ----------------------------------------------------------------------
6690      * WE HAVE DIFFERENT OPINION THAN THE DIH THAT STARTED THE TRANSACTION.
6691      * THE REASON COULD BE THAT THIS IS AN OLD DISTRIBUTION WHICH IS NO LONGER
6692      * VALID TO USE. THIS MUST BE CHECKED.
6693      * ONE IS ADDED TO THE DISTRIBUTION KEY EVERY TIME WE ADD A NEW REPLICA.
6694      * FAILED REPLICAS DO NOT AFFECT THE DISTRIBUTION KEY. THIS MEANS THAT THE
6695      * MAXIMUM DEVIATION CAN BE ONE BETWEEN THOSE TWO VALUES.
6696      * --------------------------------------------------------------------- */
6697     Int8 tmp = (TdistKey - tfragDistKey);
6698     tmp = (tmp < 0 ? - tmp : tmp);
6699     if ((tmp <= 1) || (tfragDistKey == 0)) {
6700       LQHKEY_abort(signal, 0, tcConnectptr);
6701       return;
6702     }//if
6703     LQHKEY_error(signal, 1);
6704     // Never get here
6705   }//if
6706 
6707   /*
6708    * Interpreted updates and deletes may require different AttrInfo in
6709    * different replicas, as only the primary executes the interpreted
6710    * program, and the effect of the program rather than the program
6711    * should be logged.
6712    * Non interpreted inserts, updates, writes and deletes use the same
6713    * AttrInfo in all replicas.
6714    * All reads only run on one replica, and are not logged.
6715    * The AttrInfo section is passed to TUP attached to the TUPKEYREQ
6716    * signal below.
6717    *
6718    * Normal processing :
6719    *   - LQH passes ATTRINFO section to TUP attached to direct TUPKEYREQ
6720    *     signal
6721    *   - TUP processes request and sends direct TUPKEYCONF back to LQH
6722    *   - LQH continues processing (logging, forwarding LQHKEYREQ to other
6723    *     replicas as necessary)
6724    *   - LQH frees ATTRINFO section
6725    *   Note that TUP is not responsible for freeing the passed ATTRINFO
6726    *   section, LQH is.
6727    *
6728    * Interpreted Update / Delete processing
6729    *   - LQH passes ATTRINFO section to TUP attached to direct TUPKEYREQ
6730    *     signal
6731    *   - TUP processes request, generating new ATTRINFO data
6732    *   - If new AttrInfo data is > 0 words, TUP sends it back to LQH as
6733    *     a long section attached to a single ATTRINFO signal.
6734    *     - LQH frees the original AttrInfo section and stores a ref to
6735    *       the new section
6736    *   - TUP sends direct TUPKEYCONF back to LQH with new ATTRINFO length
6737    *   - If the new ATTRINFO is > 0 words,
6738    *       - LQH continues processing with it (logging, forwarding
6739    *         LQHKEYREQ to other replicas as necessary)
6740    *       - LQH frees the new ATTRINFO section
6741    *   - If the new ATTRINFO is 0 words, LQH frees the original ATTRINFO
6742    *     section and continues processing (logging, forwarding LQHKEYREQ
6743    *     to other replicas as necessary)
6744    *
6745    */
6746   bool attrInfoToPropagate=
6747     (regTcPtr->totReclenAi != 0) &&
6748     (regTcPtr->operation != ZREAD) &&
6749     (regTcPtr->operation != ZDELETE) &&
6750     (regTcPtr->operation != ZUNLOCK);
6751   bool tupCanChangePropagatedAttrInfo= (regTcPtr->opExec == 1);
6752 
6753   bool saveAttrInfo=
6754     attrInfoToPropagate &&
6755     (! tupCanChangePropagatedAttrInfo);
6756 
6757   if (saveAttrInfo)
6758     regTcPtr->m_flags|= TcConnectionrec::OP_SAVEATTRINFO;
6759 
6760   /* Handle any AttrInfo we received with the LQHKEYREQ */
6761   if (regTcPtr->currReclenAi != 0)
6762   {
6763     jamDebug();
6764     if (isLongReq)
6765     {
6766       /* Long LQHKEYREQ */
6767       jamDebug();
6768 
6769       regTcPtr->currTupAiLen= saveAttrInfo ?
6770         regTcPtr->totReclenAi :
6771         0;
6772     }
6773     else
6774     {
6775       /* Short LQHKEYREQ */
6776       jam();
6777 
6778       /* Lets put the AttrInfo into a segmented section */
6779       bool ok= appendToSection(regTcPtr->attrInfoIVal,
6780                                lqhKeyReq->variableData + nextPos,
6781                                TreclenAiLqhkey);
6782       if (unlikely(!ok))
6783       {
6784         jam();
6785         terrorCode= ZGET_DATAREC_ERROR;
6786         abortErrorLab(signal, tcConnectptr);
6787         return;
6788       }
6789 
6790       if (saveAttrInfo)
6791         regTcPtr->currTupAiLen= TreclenAiLqhkey;
6792     }
6793   }//if
6794 
6795   /* If we've received all KeyInfo, proceed with processing,
6796    * otherwise wait for discrete KeyInfo signals
6797    */
6798   if (regTcPtr->primKeyLen == keyLenWithLQHReq) {
6799     endgettupkeyLab(signal, tcConnectptr);
6800     return;
6801   } else {
6802     jam();
6803     ndbassert(!isLongReq);
6804     /* Wait for remaining KeyInfo */
6805     regTcPtr->save1 = keyLenWithLQHReq;
6806     regTcPtr->transactionState = TcConnectionrec::WAIT_TUPKEYINFO;
6807     return;
6808   }//if
6809   return;
6810 }//Dblqh::execLQHKEYREQ()
6811 
6812 
6813 
6814 /**
6815  * endgettupkeyLab
6816  * Invoked when all KeyInfo and/or all AttrInfo has been
6817  * received
6818  */
endgettupkeyLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)6819 void Dblqh::endgettupkeyLab(Signal* signal,
6820                             const TcConnectionrecPtr tcConnectptr)
6821 {
6822   TcConnectionrec * const regTcPtr = tcConnectptr.p;
6823   if (regTcPtr->totReclenAi == regTcPtr->currReclenAi) {
6824     ;
6825   } else {
6826     jam();
6827     /* Wait for discrete AttrInfo signals */
6828     ndbrequire(regTcPtr->currReclenAi < regTcPtr->totReclenAi);
6829     ndbassert( !(regTcPtr->m_flags &
6830                  TcConnectionrec::OP_ISLONGREQ) );
6831     regTcPtr->transactionState = TcConnectionrec::WAIT_ATTR;
6832     return;
6833   }//if
6834 
6835 /* ---------------------------------------------------------------------- */
6836 /*       NOW RECEPTION OF LQHKEYREQ IS COMPLETED THE NEXT STEP IS TO START*/
6837 /*       PROCESSING THE MESSAGE. IF THE MESSAGE IS TO A STAND-BY NODE     */
6838 /*       WITHOUT NETWORK REDUNDANCY OR PREPARE-TO-COMMIT ACTIVATED THE    */
6839 /*       PREPARATION TO SEND TO THE NEXT NODE WILL START IMMEDIATELY.     */
6840 /*                                                                        */
6841 /*       OTHERWISE THE PROCESSING WILL START AFTER SETTING THE PROPER     */
6842 /*       STATE. HOWEVER BEFORE PROCESSING THE MESSAGE                     */
6843 /*       IT IS NECESSARY TO CHECK THAT THE FRAGMENT IS NOT PERFORMING     */
6844 /*       A CHECKPOINT. THE OPERATION SHALL ALSO BE LINKED INTO THE        */
6845 /*       FRAGMENT QUEUE OR LIST OF ACTIVE OPERATIONS.                     */
6846 /*                                                                        */
6847 /*       THE FIRST STEP IN PROCESSING THE MESSAGE IS TO CONTACT DBACC.    */
6848 /*------------------------------------------------------------------------*/
6849   switch (fragptr.p->fragStatus) {
6850   case Fragrecord::FSACTIVE:
6851   case Fragrecord::CRASH_RECOVERING:
6852   case Fragrecord::ACTIVE_CREATION:
6853     prepareContinueAfterBlockedLab(signal, tcConnectptr);
6854     return;
6855     break;
6856   case Fragrecord::FREE:
6857     ndbabort();
6858   case Fragrecord::DEFINED:
6859     ndbabort();
6860   case Fragrecord::REMOVING:
6861     ndbabort();
6862   default:
6863     ndbabort();
6864   }//switch
6865   return;
6866 }//Dblqh::endgettupkeyLab()
6867 
prepareContinueAfterBlockedLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)6868 void Dblqh::prepareContinueAfterBlockedLab(
6869                 Signal* signal,
6870                 const TcConnectionrecPtr tcConnectptr)
6871 {
6872   UintR ttcScanOp;
6873 
6874 /* -------------------------------------------------------------------------- */
6875 /*       INPUT:          TC_CONNECTPTR           ACTIVE CONNECTION RECORD     */
6876 /*                       FRAGPTR                 FRAGMENT RECORD              */
6877 /* -------------------------------------------------------------------------- */
6878 /* -------------------------------------------------------------------------- */
6879 /*  CONTINUE HERE AFTER BEING BLOCKED FOR A WHILE DURING LOCAL CHECKPOINT.    */
6880 /* -------------------------------------------------------------------------- */
6881 /*       ALSO AFTER NORMAL PROCEDURE WE CONTINUE HERE                         */
6882 /* -------------------------------------------------------------------------- */
6883   Uint32 tc_ptr_i = tcConnectptr.i;
6884   TcConnectionrec * const regTcPtr = tcConnectptr.p;
6885   Uint32 activeCreat = regTcPtr->activeCreat;
6886   if (regTcPtr->operation == ZUNLOCK)
6887   {
6888     jam();
6889     handleUserUnlockRequest(signal, tcConnectptr);
6890     return;
6891   }
6892 
6893   if (regTcPtr->indTakeOver == ZTRUE) {
6894     jam();
6895     ttcScanOp = KeyInfo20::getScanOp(regTcPtr->tcScanInfo);
6896     scanptr.i = RNIL;
6897     {
6898       ScanRecord key;
6899       key.scanNumber = KeyInfo20::getScanNo(regTcPtr->tcScanInfo);
6900       key.fragPtrI = fragptr.i;
6901       c_scanTakeOverHash.find(scanptr, key);
6902 #ifdef TRACE_SCAN_TAKEOVER
6903       if(scanptr.i == RNIL)
6904 	ndbout_c("not finding (%d %d)", key.scanNumber, key.fragPtrI);
6905 #endif
6906     }
6907     if (scanptr.i == RNIL) {
6908       jam();
6909       takeOverErrorLab(signal, tcConnectptr);
6910       return;
6911     }//if
6912     regTcPtr->accOpPtr= get_acc_ptr_from_scan_record(scanptr.p,
6913                                                      ttcScanOp,
6914                                                      true);
6915     if (regTcPtr->accOpPtr == RNIL) {
6916       jam();
6917       takeOverErrorLab(signal, tcConnectptr);
6918       return;
6919     }//if
6920   }//if
6921 /*-------------------------------------------------------------------*/
6922 /*       IT IS NOW TIME TO CONTACT ACC. THE TUPLE KEY WILL BE SENT   */
6923 /*       AND THIS WILL BE TRANSLATED INTO A LOCAL KEY BY USING THE   */
6924 /*       LOCAL PART OF THE LH3-ALGORITHM. ALSO PROPER LOCKS ON THE   */
6925 /*       TUPLE WILL BE SET. FOR INSERTS AND DELETES THE MESSAGE WILL */
6926 /*       START AN INSERT/DELETE INTO THE HASH TABLE.                 */
6927 /*                                                                   */
6928 /*       BEFORE SENDING THE MESSAGE THE REQUEST INFORMATION IS SET   */
6929 /*       PROPERLY.                                                   */
6930 /* ----------------------------------------------------------------- */
6931   if (TRACENR_FLAG)
6932   {
6933     TRACE_OP(regTcPtr, "RECEIVED");
6934     switch (regTcPtr->operation) {
6935     case ZREAD: TRACENR("READ"); break;
6936     case ZUPDATE: TRACENR("UPDATE"); break;
6937     case ZWRITE: TRACENR("WRITE"); break;
6938     case ZINSERT: TRACENR("INSERT"); break;
6939     case ZDELETE: TRACENR("DELETE"); break;
6940     case ZUNLOCK: TRACENR("UNLOCK"); break;
6941     case ZREFRESH: TRACENR("REFRESH"); break;
6942     default: TRACENR("<Unknown: " << regTcPtr->operation << ">"); break;
6943     }
6944 
6945     TRACENR(" tab: " << regTcPtr->tableref
6946 	   << " frag: " << regTcPtr->fragmentid
6947 	   << " activeCreat: " << (Uint32)activeCreat);
6948     if (LqhKeyReq::getNrCopyFlag(regTcPtr->reqinfo))
6949       TRACENR(" NrCopy");
6950     if (LqhKeyReq::getRowidFlag(regTcPtr->reqinfo))
6951       TRACENR(" rowid: " << regTcPtr->m_row_id);
6952     TRACENR(" key: " << getKeyInfoWordOrZero(regTcPtr, 0));
6953   }
6954 
6955   if (likely(activeCreat == Fragrecord::AC_NORMAL))
6956   {
6957     if (TRACENR_FLAG)
6958       TRACENR(endl);
6959     if (!LqhKeyReq::getNrCopyFlag(regTcPtr->reqinfo))
6960     {
6961       /* Normal path */
6962       exec_acckeyreq(signal, tcConnectptr);
6963     }
6964     else
6965     {
6966       jam();
6967       /**
6968        * Delete by ROWID from RESTORE
6969        */
6970       ndbrequire(LqhKeyReq::getRowidFlag(regTcPtr->reqinfo));
6971       ndbrequire(regTcPtr->operation == ZDELETE);
6972       handle_nr_copy(signal, tcConnectptr);
6973     }
6974   }
6975   else if (activeCreat == Fragrecord::AC_NR_COPY)
6976   {
6977     /* Node restart do not use scan lock take over */
6978     /**
6979      * This is always the code path taken after the first copy row has
6980      * arrived, both for copy rows and for normal transactions. We are
6981      * in the starting node and the fragment isn't yet up to date, so we
6982      * need to be careful with all variants of how we deal with synching
6983      * this starting fragment with the live fragment.
6984      */
6985     ndbrequire(!regTcPtr->indTakeOver);
6986     regTcPtr->totSendlenAi = regTcPtr->totReclenAi;
6987     handle_nr_copy(signal, tcConnectptr);
6988   }
6989   else
6990   {
6991     /* Aborts can not use scan lock take over.
6992      * And scan lock take over can not be aborted.
6993      *
6994      * First copy row hasn't arrived yet, we will ignore any row updates,
6995      * but to the other nodes we will act as if we have applied the
6996      * changes.
6997      */
6998     ndbrequire(!regTcPtr->indTakeOver);
6999     ndbassert(activeCreat == Fragrecord::AC_IGNORED);
7000     if (TRACENR_FLAG)
7001       TRACENR(" IGNORING (activeCreat == 2)" << endl);
7002 
7003     signal->theData[0] = tc_ptr_i;
7004     regTcPtr->transactionState = TcConnectionrec::WAIT_ACC_ABORT;
7005 
7006     signal->theData[0] = regTcPtr->tupConnectrec;
7007     EXECUTE_DIRECT(DBTUP, GSN_TUP_ABORTREQ, signal, 1);
7008     jamEntry();
7009 
7010     regTcPtr->totSendlenAi = regTcPtr->totReclenAi;
7011     packLqhkeyreqLab(signal, tcConnectptr);
7012   }
7013 }
7014 
7015 void
exec_acckeyreq(Signal * signal,TcConnectionrecPtr regTcPtr)7016 Dblqh::exec_acckeyreq(Signal* signal, TcConnectionrecPtr regTcPtr)
7017 {
7018 /* ************ */
7019 /*  ACCKEYREQ < */
7020 /* ************ */
7021   prefetch_op_record_3((Uint32*)regTcPtr.p->accConnectPtrP);
7022   {
7023     Uint32 taccreq = 0;
7024     taccreq = AccKeyReq::setOperation(taccreq, regTcPtr.p->operation);
7025     taccreq = AccKeyReq::setLockType(taccreq, regTcPtr.p->lockType);
7026     taccreq = AccKeyReq::setDirtyOp(taccreq, regTcPtr.p->dirtyOp);
7027     taccreq = AccKeyReq::setReplicaType(taccreq, regTcPtr.p->replicaType);
7028     taccreq = AccKeyReq::setTakeOver(taccreq, regTcPtr.p->indTakeOver);
7029     taccreq = AccKeyReq::setNoWait(taccreq,
7030                                    ((regTcPtr.p->m_flags & TcConnectionrec::OP_NOWAIT) != 0));
7031     taccreq = AccKeyReq::setLockReq(taccreq, false);
7032 
7033     AccKeyReq * const req = reinterpret_cast<AccKeyReq*>(&signal->theData[0]);
7034     req->fragmentPtr = fragptr.p->accFragptr;
7035     req->requestInfo = taccreq;
7036     req->hashValue = regTcPtr.p->hashValue;
7037     req->keyLen = regTcPtr.p->primKeyLen;
7038     req->transId1 = regTcPtr.p->transid[0];
7039     req->transId2 = regTcPtr.p->transid[1];
7040     req->lockConnectPtr = regTcPtr.p->indTakeOver ? regTcPtr.p->accOpPtr : RNIL;
7041     ndbrequire(req->keyLen > 0);
7042     memcpy(req->keyInfo, &req, AccKeyReq::SignalLength_keyInfo);
7043 
7044     regTcPtr.p->transactionState = TcConnectionrec::WAIT_ACC;
7045 
7046     /* Copy KeyInfo to end of ACCKEYREQ signal, starting at offset 7 */
7047     copy(req->keyInfo, regTcPtr.p->keyInfoIVal);
7048     NDB_STATIC_ASSERT(AccKeyReq::SignalLength_keyInfo == 8);
7049   }
7050   TRACE_OP(regTcPtr.p, "ACC");
7051 
7052   signal->setLength(AccKeyReq::SignalLength_keyInfo + regTcPtr.p->primKeyLen);
7053   c_acc->execACCKEYREQ(signal,
7054                        regTcPtr.p->accConnectrec,
7055                        regTcPtr.p->accConnectPtrP);
7056   jamEntryDebug();
7057   m_tc_connect_ptr = regTcPtr;
7058   if (signal->theData[0] < RNIL) {
7059     jamDebug();
7060     continueACCKEYCONF(signal,
7061                        signal->theData[3],
7062                        signal->theData[4],
7063                        regTcPtr);
7064     return;
7065   } else if (signal->theData[0] == RNIL) {
7066     ;
7067   } else {
7068     ndbrequire(signal->theData[0] == (UintR)-1);
7069     if (signal->theData[1] == ZTO_OP_STATE_ERROR) /* Dbacc scan take over error */
7070     {
7071       execACC_TO_REF(signal, regTcPtr);
7072     }
7073     else
7074     {
7075       signal->theData[0] = regTcPtr.i;
7076       execACCKEYREF(signal);
7077     }
7078   }//if
7079   return;
7080 }//Dblqh::prepareContinueAfterBlockedLab()
7081 
7082 void
handle_nr_copy(Signal * signal,Ptr<TcConnectionrec> regTcPtr)7083 Dblqh::handle_nr_copy(Signal* signal, Ptr<TcConnectionrec> regTcPtr)
7084 {
7085   jam();
7086   Uint32 fragPtr = fragptr.p->tupFragptr;
7087   Uint32 op = regTcPtr.p->operation;
7088 
7089   const bool nrCopyFlag = LqhKeyReq::getNrCopyFlag(regTcPtr.p->reqinfo);
7090 
7091   if (!LqhKeyReq::getRowidFlag(regTcPtr.p->reqinfo))
7092   {
7093     /**
7094      * Rowid not set, that mean that primary has finished copying...
7095      * This effectively means that our fragment is up-to-date and
7096      * synchronised with the primary replica. There is still work
7097      * needed to make the fragment durable, but from the point of
7098      * view of executing LQHKEYREQ we're a normal fragment now.
7099      */
7100     jam();
7101     if (TRACENR_FLAG)
7102       TRACENR(" Waiting for COPY_ACTIVEREQ" << endl);
7103     ndbassert(!LqhKeyReq::getNrCopyFlag(regTcPtr.p->reqinfo));
7104     regTcPtr.p->activeCreat = Fragrecord::AC_NORMAL;
7105     exec_acckeyreq(signal, regTcPtr);
7106     return;
7107   }
7108 
7109   /* Signal header was counted for when receiving LQHKEYREQ */
7110   c_fragBytesCopied += ((regTcPtr.p->primKeyLen +
7111                          ((regTcPtr.p->attrInfoIVal == RNIL)? 0 :
7112                           getSectionSz(regTcPtr.p->attrInfoIVal))) << 2);
7113 
7114   regTcPtr.p->m_nr_delete.m_cnt = 1; // Wait for real op aswell
7115   Uint32* dst = signal->theData+24;
7116   bool uncommitted;
7117   const int len = c_tup->nr_read_pk(fragPtr, &regTcPtr.p->m_row_id, dst,
7118 				    uncommitted);
7119   const bool match = (len>0) ? compare_key(regTcPtr.p, dst, len) == 0 : false;
7120 
7121   if (TRACENR_FLAG)
7122     TRACENR(" len: " << len << " match: " << match
7123 	   << " uncommitted: " << uncommitted);
7124 
7125   /**
7126    * len == 0 here means that the row id had no record attached to it.
7127    * len > 0 means that we returned a primary key from nr_read_pk.
7128    * len == 0 >> match = false
7129    *
7130    * DELETE by ROWID means regTcPtr.p->primKeyLen is 0 and thus compare_key
7131    * will not return true and thus match = false
7132    *
7133    * When len > 0 we will check if the primary key sent from the live node
7134    * is equal to the primary key we store here. If it is equal match = true
7135    * otherwise match = false
7136    *
7137    * The DELETE by ROWID case is reused also for delete row from RESTORE when
7138    * restoring changes in an LCP. In this we set the NrCopyFlag.
7139    */
7140   TcConnectionrecPtr tcConnectptr = regTcPtr;
7141   if (nrCopyFlag)
7142   {
7143     /**
7144      * This is a copy row sent from live node to starting node.
7145      * It is either an INSERT with the full row and with row id.
7146      * Otherwise it is a DELETE by ROWID without primary key.
7147      * This signal comes with the GCI set on the row at the primary
7148      * replica.
7149      *
7150      * It can also be a DELETE_BY_ROWID sent from RESTORE.
7151      * In this case the operation is always DELETE.
7152      */
7153     ndbassert(LqhKeyReq::getGCIFlag(regTcPtr.p->reqinfo));
7154     if (match)
7155     {
7156       /**
7157        * Case 1
7158        * ------
7159        * An INSERT is used to copy the row from the live node to the
7160        * starting node. The starting node already had the row and the
7161        * primary key was correct. So we simply translate the INSERT
7162        * into an UPDATE and perform the update. After this the row
7163        * is up to date.
7164        */
7165       jam();
7166       ndbrequire(op == ZINSERT);
7167       if (TRACENR_FLAG)
7168 	TRACENR(" Changing from INSERT to ZUPDATE" << endl);
7169       regTcPtr.p->operation = ZUPDATE;
7170       goto run;
7171     }
7172     else if (op == ZDELETE)
7173     {
7174       ndbrequire(regTcPtr.p->primKeyLen == 0);
7175       if (len > 0)
7176       {
7177         /**
7178          * Case 4
7179          * ------
7180          *   We are performing DELETE by ROWID and the row id had an already
7181          *   existing, we need to delete the row in this position.
7182          */
7183         jam();
7184         if (TRACENR_FLAG)
7185 	  TRACENR(" performing DELETE key: "
7186 	         << dst[0] << endl);
7187 
7188         if (refToMain(regTcPtr.p->tcBlockref) == RESTORE)
7189         {
7190           jam();
7191           c_restore->delete_by_rowid_succ(regTcPtr.p->tcOprec);
7192         }
7193         DEB_LCP_RESTORE(("(%u)tab(%u,%u) row(%u,%u), set GCI = %u",
7194                  instance(),
7195                  regTcPtr.p->tableref,
7196                  regTcPtr.p->fragmentid,
7197                  regTcPtr.p->m_row_id.m_page_no,
7198                  regTcPtr.p->m_row_id.m_page_idx,
7199                  regTcPtr.p->gci_hi));
7200         c_tup->nr_update_gci(fragPtr,
7201                              &regTcPtr.p->m_row_id,
7202                              regTcPtr.p->gci_hi,
7203                              true);
7204         nr_copy_delete_row(signal, regTcPtr, &regTcPtr.p->m_row_id, len);
7205         ndbassert(regTcPtr.p->m_nr_delete.m_cnt);
7206         regTcPtr.p->m_nr_delete.m_cnt--; // No real op is run
7207         if (regTcPtr.p->m_nr_delete.m_cnt)
7208         {
7209 	  jam();
7210           /* Only happens with disk data in copy fragment phase */
7211           ndbrequire(regTcPtr.p->activeCreat == Fragrecord::AC_NR_COPY);
7212 	  return;
7213         }
7214         packLqhkeyreqLab(signal, regTcPtr);
7215         return;
7216       }
7217       else if (len == 0 && op == ZDELETE)
7218       {
7219         /**
7220          * Case 7
7221          * ------
7222          * We are performing a DELETE by ROWID and there was no row at this
7223          * row id. We set the correct GCI in this row id.
7224          */
7225         jam();
7226         if (TRACENR_FLAG)
7227 	  TRACENR(" UPDATE_GCI" << endl);
7228         if (refToMain(regTcPtr.p->tcBlockref) == RESTORE)
7229         {
7230           jam();
7231           c_restore->delete_by_rowid_fail(regTcPtr.p->tcOprec);
7232         }
7233         c_tup->nr_update_gci(fragPtr,
7234                              &regTcPtr.p->m_row_id,
7235                              regTcPtr.p->gci_hi,
7236                              false);
7237         goto update_gci_ignore;
7238       }
7239     }
7240     /* !match && op != ZDELETE */
7241 
7242     /**
7243      * If we come here we are receiving a copy row (an INSERT), the
7244      * row id position either had an existing row at this position or not,
7245      * but if it had it has a different primary key.
7246      *
7247      * Perform the following action:
7248      * 1) Delete row at specified rowid (if len > 0)
7249      * 2) Delete specified row at different rowid (if exists)
7250      * 3) Run insert
7251      */
7252     if (len > 0)
7253     {
7254       /**
7255        * 1) Delete row at specified rowid (if len > 0)
7256        * A row existed but it was different so we delete the row at this
7257        * row id position.
7258        */
7259       jam();
7260       nr_copy_delete_row(signal, regTcPtr, &regTcPtr.p->m_row_id, len);
7261     }
7262     /**
7263      * 2) Delete specified row at different rowid (if exists)
7264      * It is technically possible that a row with the same primary key
7265      * also exists. This record then has a different row id. This is an
7266      * interesting case which can happen if the given primary key and then
7267      * later inserted again. We have to handle this case now even though it
7268      * would be handled later as the hash index is unique and cannot have
7269      * two records with the same primary key.
7270      *
7271      * We will soon reinsert a record with this primary key, so the primary
7272      * key is simply moved to another row id. The row id it is currently
7273      * placed should have a higher row id since the copy process goes from
7274      * low row ids to higher row ids.
7275      */
7276     jam();
7277     nr_copy_delete_row(signal, regTcPtr, 0, 0);
7278     if (TRACENR_FLAG)
7279       TRACENR(" RUN INSERT" << endl);
7280     goto run;
7281   }
7282   else
7283   {
7284     /**
7285      * nrCopyFlag == false
7286      * This is a normal operation in a starting node which is currently being
7287      * synchronised with the live node.
7288      */
7289 
7290     /**
7291      * match used for NrCopy operations above is based on a binary
7292      * comparison, but char keys with certain collations can be
7293      * equivalent but not binary equal.
7294      * We check for this case now if no match found so far
7295      * This assumes that there is no case where
7296      * binary equality does not imply collation equality.
7297      */
7298     bool xfrmMatch = match;
7299     const Uint32 tableId = regTcPtr.p->tableref;
7300     if (!match &&
7301         g_key_descriptor_pool.getPtr(tableId)->hasCharAttr)
7302     {
7303       Uint64 reqKey[ MAX_KEY_SIZE_IN_WORDS >> 1 ];
7304       Uint64 dbXfrmKey[ (MAX_KEY_SIZE_IN_WORDS*MAX_XFRM_MULTIPLY) >> 1 ];
7305       Uint64 reqXfrmKey[ (MAX_KEY_SIZE_IN_WORDS*MAX_XFRM_MULTIPLY) >> 1 ];
7306       Uint32 keyPartLen[MAX_ATTRIBUTES_IN_INDEX];
7307 
7308       jam();
7309 
7310       /* Transform db table key read from DB above into dbXfrmKey */
7311       const int dbXfrmKeyLen = xfrm_key_hash(tableId,
7312                                              &signal->theData[24],
7313                                              (Uint32*)dbXfrmKey,
7314                                              sizeof(dbXfrmKey) >> 2,
7315                                              keyPartLen);
7316 
7317       /* Copy request key into linear space */
7318       copy((Uint32*) reqKey, regTcPtr.p->keyInfoIVal);
7319 
7320       /* Transform request key */
7321       const int reqXfrmKeyLen = xfrm_key_hash(tableId,
7322                                               (Uint32*)reqKey,
7323                                               (Uint32*)reqXfrmKey,
7324                                               sizeof(reqXfrmKey) >> 2,
7325                                               keyPartLen);
7326       /* Check for a match between the xfrmd keys */
7327       if (dbXfrmKeyLen > 0 &&
7328           dbXfrmKeyLen == reqXfrmKeyLen)
7329       {
7330         jam();
7331         /* Binary compare xfrm'd representations */
7332         xfrmMatch = (memcmp(dbXfrmKey, reqXfrmKey, dbXfrmKeyLen << 2) == 0);
7333       }
7334     }
7335 
7336     if (!xfrmMatch && op != ZINSERT)
7337     {
7338       /**
7339        * We are performing an UPDATE or a DELETE and the row id position
7340        * doesn't contain the correct primary key.
7341        *
7342        * Either there was no row in this row id, or it is an old row which
7343        * which haven't yet seen the copy row. We can safely ignore this
7344        * one.
7345        */
7346       jam();
7347       if (TRACENR_FLAG)
7348 	TRACENR(" IGNORE " << endl);
7349       goto ignore;
7350     }
7351     if (xfrmMatch)
7352     {
7353       /**
7354        * An INSERT/UPDATE/DELETE/REFRESH on a record where we have the correct
7355        * primary key in this row id position. We convert the INSERT to a write
7356        * to speed things up a bit rather than first deleting row and then
7357        * inserting it. UPDATE is also converted to WRITE, but this has no real
7358        * effect when the row is already there.
7359        */
7360       jam();
7361       if (op != ZDELETE && op != ZREFRESH)
7362       {
7363 	if (TRACENR_FLAG)
7364 	  TRACENR(" Changing from INSERT/UPDATE to ZWRITE" << endl);
7365 	regTcPtr.p->operation = ZWRITE;
7366       }
7367       goto run;
7368     }
7369 
7370     /**
7371      * This is a normal operation that does an insert in a row id position
7372      * which either has a different primary key or no record in the row
7373      * id position.
7374      *
7375      * We cannot ignore this one. If it is inserted before the current row
7376      * id position in the live node, then we will not see any copy row for
7377      * this row. Since we don't know we will perform the insert now in the
7378      * same manner as if it was a copy row coming. It might be redone later
7379      * but this is not a problem with consistency.
7380      */
7381     ndbassert(!xfrmMatch && op == ZINSERT);
7382 
7383     /**
7384      * Perform the following action (same as above for copy row case)
7385      * 1) Delete row at specified rowid (if len > 0)
7386      * 2) Delete specified row at different rowid (if exists)
7387      * 3) Run insert
7388      */
7389     if (len > 0)
7390     {
7391       /**
7392        * 1) Delete row at specified rowid (if len > 0)
7393        */
7394       jam();
7395       nr_copy_delete_row(signal, regTcPtr, &regTcPtr.p->m_row_id, len);
7396     }
7397 
7398     /**
7399      * 2) Delete specified row at different rowid (if exists)
7400      */
7401     jam();
7402     nr_copy_delete_row(signal, regTcPtr, 0, 0);
7403     if (TRACENR_FLAG)
7404       TRACENR(" RUN op: " << op << endl);
7405     goto run;
7406   }
7407 
7408 run:
7409   jam();
7410   exec_acckeyreq(signal, regTcPtr);
7411   return;
7412 
7413 ignore:
7414   jam();
7415   ndbassert(!LqhKeyReq::getNrCopyFlag(regTcPtr.p->reqinfo));
7416 update_gci_ignore:
7417   regTcPtr.p->activeCreat = Fragrecord::AC_IGNORED;
7418   signal->theData[0] = regTcPtr.p->tupConnectrec;
7419   EXECUTE_DIRECT(DBTUP, GSN_TUP_ABORTREQ, signal, 1);
7420 
7421   packLqhkeyreqLab(signal, tcConnectptr);
7422 }
7423 
7424 /**
7425  * Compare received key data with the data supplied
7426  * returning 0 if they are the same, 1 otherwise
7427  */
7428 int
compare_key(const TcConnectionrec * regTcPtr,const Uint32 * ptr,Uint32 len)7429 Dblqh::compare_key(const TcConnectionrec* regTcPtr,
7430 		   const Uint32 * ptr, Uint32 len)
7431 {
7432   if (regTcPtr->primKeyLen != len)
7433     return 1;
7434 
7435   ndbassert( regTcPtr->keyInfoIVal != RNIL );
7436 
7437   SectionReader keyInfoReader(regTcPtr->keyInfoIVal,
7438                               getSectionSegmentPool());
7439 
7440   ndbassert(regTcPtr->primKeyLen == keyInfoReader.getSize());
7441 
7442   while (len != 0)
7443   {
7444     const Uint32* keyChunk= NULL;
7445     Uint32 chunkSize= 0;
7446 
7447     /* Get a ptr to a chunk of contiguous words to compare */
7448     bool ok= keyInfoReader.getWordsPtr(len, keyChunk, chunkSize);
7449 
7450     ndbrequire(ok);
7451 
7452     if ( memcmp(ptr, keyChunk, chunkSize << 2))
7453       return 1;
7454 
7455     ptr+= chunkSize;
7456     len-= chunkSize;
7457   }
7458 
7459   return 0;
7460 }
7461 
7462 void
nr_copy_delete_row(Signal * signal,Ptr<TcConnectionrec> regTcPtr,Local_key * rowid,Uint32 len)7463 Dblqh::nr_copy_delete_row(Signal* signal,
7464 			  Ptr<TcConnectionrec> regTcPtr,
7465 			  Local_key* rowid, Uint32 len)
7466 {
7467   Ptr<Fragrecord> fragPtr = fragptr;
7468 
7469   Uint32 tableId = regTcPtr.p->tableref;
7470   Uint32 siglen;
7471 
7472   prefetch_op_record_3((Uint32*)regTcPtr.p->accConnectPtrP);
7473 
7474   Uint32 accreq = 0;
7475   accreq = AccKeyReq::setOperation(accreq, ZDELETE);
7476   accreq = AccKeyReq::setLockType(accreq, ZDELETE);
7477   accreq = AccKeyReq::setDirtyOp(accreq, false);
7478   accreq = AccKeyReq::setReplicaType(accreq, 0); // ?
7479   accreq = AccKeyReq::setTakeOver(accreq, false);
7480   accreq = AccKeyReq::setLockReq(accreq, false);
7481 
7482   AccKeyReq * const req = reinterpret_cast<AccKeyReq*>(&signal->theData[0]);
7483   req->fragmentPtr = fragptr.p->accFragptr;
7484   req->requestInfo = accreq;
7485   req->transId1 = regTcPtr.p->transid[0];
7486   req->transId2 = regTcPtr.p->transid[1];
7487   req->lockConnectPtr = RNIL;
7488 
7489   if (rowid)
7490   {
7491     jam();
7492     if (g_key_descriptor_pool.getPtr(tableId)->hasCharAttr)
7493     {
7494       req->hashValue = calculateHash(tableId, signal->theData+24);
7495     }
7496     else
7497     {
7498       req->hashValue = md5_hash((Uint64*)(signal->theData+24), len);
7499     }
7500     req->keyLen = 0; // search by local key
7501     req->localKey[0] = rowid->m_page_no;
7502     req->localKey[1] = rowid->m_page_idx;
7503     siglen = AccKeyReq::SignalLength_localKey;
7504     NDB_STATIC_ASSERT(AccKeyReq::SignalLength_localKey == 10);
7505   }
7506   else
7507   {
7508     jam();
7509     Uint32 keylen = regTcPtr.p->primKeyLen;
7510     req->hashValue = regTcPtr.p->hashValue;
7511     req->keyLen = keylen;
7512 
7513     /* Copy KeyInfo inline into the ACCKEYREQ signal,
7514      * starting at word 7
7515      */
7516     copy(req->keyInfo, regTcPtr.p->keyInfoIVal);
7517     siglen = AccKeyReq::SignalLength_keyInfo + keylen;
7518     NDB_STATIC_ASSERT(AccKeyReq::SignalLength_keyInfo == 8);
7519   }
7520   signal->setLength(siglen);
7521   c_acc->execACCKEYREQ(signal,
7522                        regTcPtr.p->accConnectrec,
7523                        regTcPtr.p->accConnectPtrP);
7524   jamEntry();
7525 
7526   Uint32 retValue = signal->theData[0];
7527   ndbrequire(retValue != RNIL); // This should never block...
7528 
7529   if (retValue == (Uint32)-1)
7530   {
7531     /**
7532      * Only delete by pk, may fail
7533      */
7534     jam();
7535     ndbrequire(rowid == 0);
7536     c_acc->execACC_ABORTREQ(signal,
7537                             regTcPtr.p->accConnectrec,
7538                             regTcPtr.p->accConnectPtrP,
7539                             0);
7540     jamEntry();
7541     return;
7542   }
7543 
7544   /**
7545    * We found row (and have it locked in ACC)
7546    */
7547   ndbrequire(regTcPtr.p->m_dealloc_state == TcConnectionrec::DA_IDLE);
7548   ndbrequire(regTcPtr.p->m_dealloc_data.m_dealloc_ref_count == RNIL);
7549   Local_key save = regTcPtr.p->m_row_id;
7550 
7551   c_acc->execACCKEY_ORD(signal,
7552                         regTcPtr.p->accConnectrec,
7553                         regTcPtr.p->accConnectPtrP);
7554   c_acc->execACC_COMMITREQ(signal,
7555                            regTcPtr.p->accConnectrec,
7556                            regTcPtr.p->accConnectPtrP);
7557   jamEntry();
7558 
7559   ndbrequire(regTcPtr.p->m_dealloc_state == TcConnectionrec::DA_DEALLOC_COUNT);
7560   ndbrequire(regTcPtr.p->m_dealloc_data.m_dealloc_ref_count == 1);
7561   int ret = c_tup->nr_delete(signal, regTcPtr.i,
7562 			     fragPtr.p->tupFragptr, &regTcPtr.p->m_row_id,
7563 			     regTcPtr.p->gci_hi);
7564   jamEntry();
7565 
7566   if (ret)
7567   {
7568     ndbassert(ret == 1);
7569     ndbrequire(regTcPtr.p->activeCreat == Fragrecord::AC_NR_COPY);
7570     Uint32 pos = regTcPtr.p->m_nr_delete.m_cnt - 1;
7571     memcpy(regTcPtr.p->m_nr_delete.m_disk_ref + pos,
7572 	   signal->theData, sizeof(Local_key));
7573     regTcPtr.p->m_nr_delete.m_page_id[pos] = RNIL;
7574     regTcPtr.p->m_nr_delete.m_cnt = pos + 2;
7575     if (0) ndbout << "PENDING DISK DELETE: " <<
7576       regTcPtr.p->m_nr_delete.m_disk_ref[pos] << endl;
7577   }
7578 
7579   TRACENR("DELETED: " << regTcPtr.p->m_row_id << endl);
7580 
7581   regTcPtr.p->m_dealloc_state = TcConnectionrec::DA_IDLE;
7582   regTcPtr.p->m_dealloc_data.m_dealloc_ref_count = RNIL;
7583   regTcPtr.p->m_row_id = save;
7584   fragptr = fragPtr;
7585 }
7586 
7587 void
get_nr_op_info(Nr_op_info * op,Uint32 page_id)7588 Dblqh::get_nr_op_info(Nr_op_info* op, Uint32 page_id)
7589 {
7590   Ptr<TcConnectionrec> tcPtr;
7591   tcPtr.i = op->m_ptr_i;
7592 
7593   ndbrequire(tcConnect_pool.getValidPtr(tcPtr));
7594   Ptr<Fragrecord> fragPtr;
7595   c_fragment_pool.getPtr(fragPtr, tcPtr.p->fragmentptr);
7596 
7597   op->m_gci_hi = tcPtr.p->gci_hi;
7598   op->m_gci_lo = tcPtr.p->gci_lo;
7599   op->m_row_id = tcPtr.p->m_row_id;
7600   op->m_tup_frag_ptr_i = fragPtr.p->tupFragptr;
7601 
7602   ndbrequire(tcPtr.p->activeCreat == Fragrecord::AC_NR_COPY);
7603   ndbrequire(tcPtr.p->m_nr_delete.m_cnt);
7604 
7605 
7606   if (page_id == RNIL)
7607   {
7608     // get log buffer callback
7609     for (Uint32 i = 0; i<2; i++)
7610     {
7611       if (tcPtr.p->m_nr_delete.m_page_id[i] != RNIL)
7612       {
7613 	op->m_page_id = tcPtr.p->m_nr_delete.m_page_id[i];
7614 	op->m_disk_ref = tcPtr.p->m_nr_delete.m_disk_ref[i];
7615 	return;
7616       }
7617     }
7618   }
7619   else
7620   {
7621     // get page callback
7622     for (Uint32 i = 0; i<2; i++)
7623     {
7624       Local_key key = tcPtr.p->m_nr_delete.m_disk_ref[i];
7625       if (op->m_disk_ref.m_page_no == key.m_page_no &&
7626 	  op->m_disk_ref.m_file_no == key.m_file_no &&
7627 	  tcPtr.p->m_nr_delete.m_page_id[i] == RNIL)
7628       {
7629 	op->m_disk_ref = key;
7630 	tcPtr.p->m_nr_delete.m_page_id[i] = page_id;
7631 	return;
7632       }
7633     }
7634   }
7635   ndbabort();
7636 }
7637 
7638 void
nr_delete_complete(Signal * signal,Nr_op_info * op)7639 Dblqh::nr_delete_complete(Signal* signal, Nr_op_info* op)
7640 {
7641   jamEntry();
7642   Ptr<TcConnectionrec> tcPtr;
7643   tcPtr.i = op->m_ptr_i;
7644   ndbrequire(tcConnect_pool.getValidPtr(tcPtr));
7645 
7646   ndbrequire(tcPtr.p->activeCreat == Fragrecord::AC_NR_COPY);
7647   ndbrequire(tcPtr.p->m_nr_delete.m_cnt);
7648 
7649   tcPtr.p->m_nr_delete.m_cnt--;
7650   if (tcPtr.p->m_nr_delete.m_cnt == 0)
7651   {
7652     jam();
7653     const TcConnectionrecPtr tcConnectptr = tcPtr;
7654     c_fragment_pool.getPtr(fragptr, tcPtr.p->fragmentptr);
7655 
7656     if (tcPtr.p->abortState != TcConnectionrec::ABORT_IDLE)
7657     {
7658       jam();
7659       tcPtr.p->activeCreat = Fragrecord::AC_NORMAL;
7660       abortCommonLab(signal, tcConnectptr);
7661     }
7662     else if (tcPtr.p->operation == ZDELETE &&
7663 	     LqhKeyReq::getNrCopyFlag(tcPtr.p->reqinfo))
7664     {
7665       /**
7666        * This is run directly in handle_nr_copy
7667        */
7668       jam();
7669       packLqhkeyreqLab(signal, tcConnectptr);
7670     }
7671     else
7672     {
7673       jam();
7674       rwConcludedLab(signal, tcConnectptr);
7675     }
7676     return;
7677   }
7678 
7679   if (memcmp(&tcPtr.p->m_nr_delete.m_disk_ref[0],
7680 	     &op->m_disk_ref, sizeof(Local_key)) == 0)
7681   {
7682     jam();
7683     ndbassert(tcPtr.p->m_nr_delete.m_page_id[0] != RNIL);
7684     tcPtr.p->m_nr_delete.m_page_id[0] = tcPtr.p->m_nr_delete.m_page_id[1];
7685     tcPtr.p->m_nr_delete.m_disk_ref[0] = tcPtr.p->m_nr_delete.m_disk_ref[1];
7686   }
7687 }
7688 
7689 Uint32
readPrimaryKeys(Uint32 opPtrI,Uint32 * dst,bool xfrm)7690 Dblqh::readPrimaryKeys(Uint32 opPtrI, Uint32 * dst, bool xfrm)
7691 {
7692   TcConnectionrecPtr regTcPtr;
7693   Uint64 Tmp[MAX_KEY_SIZE_IN_WORDS >> 1];
7694 
7695   jamEntry();
7696   regTcPtr.i = opPtrI;
7697   ndbrequire(tcConnect_pool.getValidPtr(regTcPtr));
7698 
7699   Uint32 tableId = regTcPtr.p->tableref;
7700   Uint32 keyLen = regTcPtr.p->primKeyLen;
7701   Uint32 * tmp = xfrm ? (Uint32*)Tmp : dst;
7702 
7703   copy(tmp, regTcPtr.p->keyInfoIVal);
7704 
7705   if (xfrm)
7706   {
7707     jam();
7708     Uint32 keyPartLen[MAX_ATTRIBUTES_IN_INDEX];
7709     return xfrm_key_hash(tableId, (Uint32*)Tmp, dst, ~0, keyPartLen);
7710   }
7711 
7712   return keyLen;
7713 }
7714 
7715 /**
7716  * getKeyInfoWordOrZero
7717  * Get given word of KeyInfo, or zero if it's not available
7718  * Used for tracing
7719  */
7720 Uint32
getKeyInfoWordOrZero(const TcConnectionrec * regTcPtr,Uint32 offset)7721 Dblqh::getKeyInfoWordOrZero(const TcConnectionrec* regTcPtr,
7722                             Uint32 offset)
7723 {
7724   if (regTcPtr->keyInfoIVal != RNIL)
7725   {
7726     SectionReader keyInfoReader(regTcPtr->keyInfoIVal,
7727                                 g_sectionSegmentPool);
7728 
7729     if (keyInfoReader.getSize() > offset)
7730     {
7731       if (offset)
7732         keyInfoReader.step(offset);
7733 
7734       Uint32 word;
7735       keyInfoReader.getWord(&word);
7736       return word;
7737     }
7738   }
7739   return 0;
7740 }
7741 
unlockError(Signal * signal,Uint32 error,const TcConnectionrecPtr tcConnectptr)7742 void Dblqh::unlockError(Signal* signal,
7743                         Uint32 error,
7744                         const TcConnectionrecPtr tcConnectptr)
7745 {
7746   terrorCode = error;
7747   abortErrorLab(signal, tcConnectptr);
7748 }
7749 
7750 /**
7751  * handleUserUnlockRequest
7752  *
7753  * This method handles an LQHKEYREQ unlock request from
7754  * TC.
7755  */
handleUserUnlockRequest(Signal * signal,TcConnectionrecPtr tcConnectptr)7756 void Dblqh::handleUserUnlockRequest(Signal* signal,
7757                                     TcConnectionrecPtr tcConnectptr)
7758 {
7759   jam();
7760   TcConnectionrec * const regTcPtr = tcConnectptr.p;
7761   Uint32 tcPtrI = tcConnectptr.i;
7762 
7763   /* Request to unlock (abort) an existing read operation
7764    *
7765    * 1) Get user's LOCK_REF from KeyInfo
7766    *
7767    * 2) Lookup TC_OP_REF in hash
7768    *
7769    * 3) Check state of found op : TransId, state, type, lock
7770    *
7771    * 4) Check op_id portion
7772    *
7773    * 5) Abort locking op in ACC
7774    *
7775    * 6) Clean up locking op in LQH
7776    *
7777    * 7) Send LQHKEYCONF to TC for user unlock op
7778    *
7779    * 8) Clean up user unlock op
7780    */
7781   if (unlikely( regTcPtr->primKeyLen != LqhKeyReq::UnlockKeyLen ))
7782   {
7783     jam();
7784     unlockError(signal, 4109, tcConnectptr); /* Faulty primary key attribute length */
7785     return;
7786   }
7787 
7788   SectionReader keyInfoReader(regTcPtr->keyInfoIVal,
7789                               getSectionSegmentPool());
7790 
7791   ndbrequire( keyInfoReader.getSize() == regTcPtr->primKeyLen );
7792 
7793   /* Extract components of user lock reference */
7794   Uint32 tcOpRecIndex;
7795   Uint32 lqhOpIdWord;
7796   ndbrequire( keyInfoReader.getWord( &tcOpRecIndex ) ); // Locking op TC index
7797   ndbrequire( keyInfoReader.getWord( &lqhOpIdWord ) );  // Part of Locking op LQH id
7798 
7799   /* Use TC operation record index to find the operation record
7800    * This requires that this operation and the referenced
7801    * operation are part of the same transaction.
7802    * On success this sets tcConnectptr.i and .p to the
7803    * operation-to-unlock's record.
7804    */
7805   if (unlikely( findTransaction(regTcPtr->transid[0],
7806                                 regTcPtr->transid[1],
7807                                 tcOpRecIndex,
7808                                 0,
7809                                 tcConnectptr) != ZOK))
7810   {
7811     jam();
7812     unlockError(signal, ZBAD_OP_REF, tcConnectptr);
7813     return;
7814   }
7815 
7816   TcConnectionrec * const regLockTcPtr = tcConnectptr.p;
7817 
7818   /* Validate that the bottom 32-bits of the operation id reference
7819    * we were given are in alignment
7820    */
7821   Uint32 lockOpKeyReqId = (Uint32) regLockTcPtr->lqhKeyReqId;
7822   if (unlikely( lockOpKeyReqId != lqhOpIdWord ))
7823   {
7824     jam();
7825     unlockError(signal, ZBAD_OP_REF, tcConnectptr);
7826     return;
7827   }
7828 
7829   /* Validate the state of the locking operation */
7830   bool lockingOpValid =
7831     (( regLockTcPtr->operation == ZREAD ) &&
7832        // ZREAD_EX mapped to ZREAD above
7833      ( ! regLockTcPtr->dirtyOp ) &&
7834      ( ! regLockTcPtr->opSimple ) &&
7835      ( (regLockTcPtr->lockType == ZREAD) ||  // LM_Read
7836        (regLockTcPtr->lockType == ZUPDATE) ) // LM_Exclusive
7837      &&
7838      ( regLockTcPtr->transactionState == TcConnectionrec::PREPARED ) &&
7839      ( regLockTcPtr->commitAckMarker == RNIL ) &&
7840        // No commit ack marker
7841      ( regLockTcPtr->logWriteState ==
7842        TcConnectionrec::NOT_STARTED )); // No log written
7843 
7844   if (unlikely(! lockingOpValid))
7845   {
7846     jam();
7847     unlockError(signal, ZBAD_UNLOCK_STATE, tcConnectptr);
7848     return;
7849   }
7850 
7851   /* Ok, now we're ready to start 'aborting' this operation, to get the
7852    * effect of unlocking it
7853    */
7854   c_acc->execACC_ABORTREQ(signal,
7855                           regLockTcPtr->accConnectrec,
7856                           regLockTcPtr->accConnectPtrP,
7857                           0);
7858   jamEntry();
7859 
7860   /* Would be nice to handle non-success case somehow */
7861   ndbrequire(signal->theData[1] == 0);
7862 
7863   /* Now we want to release LQH resources associated with the
7864    * locking operation
7865    */
7866   cleanUp(signal, tcConnectptr);
7867 
7868   /* Now that the locking operation has been 'disappeared', we need to
7869    * send an LQHKEYCONF for the unlock operation and then 'disappear' it
7870    * as well
7871    */
7872   tcConnectptr.i = tcPtrI;
7873   ndbrequire(tcConnect_pool.getValidPtr(tcConnectptr));
7874 
7875   ndbrequire( regTcPtr == tcConnectptr.p );
7876 
7877   /* Set readlenAi to the unlocked operation's TC operation ref */
7878   regTcPtr->readlenAi = tcOpRecIndex;
7879 
7880   /* Clear number of fired triggers */
7881   regTcPtr->numFiredTriggers = 0;
7882 
7883   /* Now send the LQHKEYCONF to TC */
7884   sendLqhkeyconfTc(signal, regTcPtr->tcBlockref, tcConnectptr);
7885 
7886   /* Finally, clean up the unlock operation itself */
7887   cleanUp(signal, tcConnectptr);
7888 
7889   return;
7890 }
7891 
7892 /**
7893  * TUPle deallocation
7894  *
7895  * ACC informs LQH via TUP_DEALLOCREQ when a TUPle (ROWID)
7896  * is no longer needed by active ACC operations (Key ops,
7897  * scans with locks etc)
7898  * LQH then informs TUP via TUP_DEALLOCREQ when TUP should
7899  * release the storage, making it available for some other
7900  * insert to the fragment.
7901  *
7902  * It is important that ROWIDs are released on Backup
7903  * replicas before the Primary replica.
7904  *
7905  * To ensure this:
7906  *   ACC :
7907  *   - Informs LQH of each of the operations involved in releasing
7908  *     a ROWID at commit time
7909  *   - Informs LQH when the last operation involved in releasing
7910  *     a ROWID has committed
7911  *
7912  *   LQH :
7913  *   - Tracks these details as a reference count on one of the
7914  *     operations involved in releasing the ROWID (the dealloc op).
7915  *   - Marks the other operations to point to the dealloc op
7916  *   - Decrements the reference count when :
7917  *       - ACC informs LQH that the last involved operation
7918  *         has committed
7919  *       - Referring operations complete locally
7920  *       - The dealloc op completes locally
7921  *   - When the reference count hits zero, TUP is told to release
7922  *     the row storage
7923  *
7924  * Using a reference count avoids problems with premature release when
7925  * the order of completion of the involved operations varies.
7926  *
7927  * Since the dealloc operation can complete before the ref count hits
7928  * zero, it supports a 'zombie' state where it is not yet deallocated
7929  * as it is hosting a count
7930  */
7931 
7932 /**
7933  * incrDeallocRefCount
7934  *
7935  * Called when ACC notifies LQH of operations involved in TUPle
7936  * deallocation
7937  */
incrDeallocRefCount(Signal * signal,Uint32 opPtrI,Uint32 countOpPtrI)7938 void Dblqh::incrDeallocRefCount(Signal* signal,
7939                                 Uint32 opPtrI,
7940                                 Uint32 countOpPtrI)
7941 {
7942   jam();
7943   ndbrequire(opPtrI != RNIL);
7944   ndbrequire(countOpPtrI != RNIL);
7945 
7946   TcConnectionrecPtr opPtr;
7947   opPtr.i = opPtrI;
7948   ndbrequire(tcConnect_pool.getValidPtr(opPtr));
7949 
7950   TcConnectionrecPtr countOpPtr;
7951   countOpPtr.i = countOpPtrI;
7952   ndbrequire(tcConnect_pool.getValidPtr(countOpPtr));
7953 
7954   const bool referring_op = (opPtrI != countOpPtrI);
7955 
7956   if (referring_op)
7957   {
7958     jam();
7959     ndbrequire(opPtr.p->m_dealloc_state == TcConnectionrec::DA_IDLE);
7960     ndbrequire(opPtr.p->m_dealloc_data.m_dealloc_ref_count == RNIL);
7961     opPtr.p->m_dealloc_state = TcConnectionrec::DA_DEALLOC_REFERENCE;
7962     opPtr.p->m_dealloc_data.m_dealloc_op_id = countOpPtr.i;
7963   }
7964 
7965   if (countOpPtr.p->m_dealloc_state == TcConnectionrec::DA_IDLE)
7966   {
7967     jam();
7968     ndbrequire(countOpPtr.p->m_dealloc_data.m_dealloc_op_id == RNIL);
7969 
7970     // init count to 1 so that final refcount = op count + 1
7971     // this ensures that dealloc cannot happen until ACC sends
7972     // additional signal to decrement refcount to 0.
7973     countOpPtr.p->m_dealloc_state = TcConnectionrec::DA_DEALLOC_COUNT;
7974     countOpPtr.p->m_dealloc_data.m_dealloc_ref_count = 1;
7975   }
7976 
7977   /* Increment count */
7978   ndbrequire(countOpPtr.p->m_dealloc_state == TcConnectionrec::DA_DEALLOC_COUNT ||
7979              countOpPtr.p->m_dealloc_state == TcConnectionrec::DA_DEALLOC_COUNT_ZOMBIE);
7980   ndbrequire(countOpPtr.p->m_dealloc_data.m_dealloc_ref_count != RNIL);
7981 
7982   countOpPtr.p->m_dealloc_data.m_dealloc_ref_count++;
7983 }
7984 
7985 /**
7986  * decrDeallocRefCount
7987  *
7988  * Called when ACC triggers deallocation, and when involved
7989  * operations complete.
7990  *
7991  * Returns the new count of references on the rowID
7992  */
decrDeallocRefCount(Signal * signal,Uint32 opPtrI)7993 Uint32 Dblqh::decrDeallocRefCount(Signal* signal,
7994                                   Uint32 opPtrI)
7995 {
7996   jam();
7997   ndbrequire(opPtrI != RNIL);
7998 
7999   TcConnectionrecPtr opPtr;
8000   opPtr.i = opPtrI;
8001   ndbrequire(tcConnect_pool.getValidPtr(opPtr));
8002 
8003   TcConnectionrecPtr countOpPtr = opPtr;
8004 
8005   if (opPtr.p->m_dealloc_state == TcConnectionrec::DA_DEALLOC_REFERENCE)
8006   {
8007     jam();
8008     ndbrequire(opPtr.p->m_dealloc_data.m_dealloc_op_id != RNIL);
8009     countOpPtr.i = opPtr.p->m_dealloc_data.m_dealloc_op_id;
8010     ndbrequire(tcConnect_pool.getValidPtr(countOpPtr));
8011   }
8012 
8013   ndbrequire(countOpPtr.p->m_dealloc_state == TcConnectionrec::DA_DEALLOC_COUNT ||
8014              countOpPtr.p->m_dealloc_state == TcConnectionrec::DA_DEALLOC_COUNT_ZOMBIE);
8015   ndbrequire(countOpPtr.p->m_dealloc_data.m_dealloc_ref_count != RNIL);
8016   ndbrequire(countOpPtr.p->m_dealloc_data.m_dealloc_ref_count > 0);
8017 
8018   const Uint32 newCount = --countOpPtr.p->m_dealloc_data.m_dealloc_ref_count;
8019 
8020   if (newCount == 0)
8021   {
8022     jam();
8023     /* Dealloc TUPle now */
8024     signal->theData[0] = countOpPtr.p->fragmentid;
8025     signal->theData[1] = countOpPtr.p->tableref;
8026     signal->theData[2] = countOpPtr.p->m_row_id.m_page_no;
8027     signal->theData[3] = countOpPtr.p->m_row_id.m_page_idx;
8028     signal->theData[4] = RNIL;
8029 
8030     EXECUTE_DIRECT(DBTUP, GSN_TUP_DEALLOCREQ, signal, 5);
8031 
8032     bool countOpIsZombie =
8033       (countOpPtr.p->m_dealloc_state == TcConnectionrec::DA_DEALLOC_COUNT_ZOMBIE);
8034 
8035     countOpPtr.p->m_dealloc_state = TcConnectionrec::DA_IDLE;
8036     countOpPtr.p->m_dealloc_data.m_dealloc_ref_count = RNIL;
8037 
8038     if (countOpIsZombie)
8039     {
8040       jam();
8041       /**
8042        * Op was not released during COMPLETE because it is a zombie dealloc op
8043        * Release it now.
8044        */
8045       releaseTcrec(signal, countOpPtr);
8046     }
8047   }
8048 
8049   return newCount;
8050 }
8051 
8052 /**
8053  * handleDeallocOp
8054  *
8055  * Called when LQH is releasing an operation record which has
8056  * a non idle m_dealloc_state state
8057  *
8058  * The relevant counting operation is found, the deallocation
8059  * ref count is decremented, the tuple is freed if the count
8060  * hits zero, and the counting op is freed if it is a zombie.
8061  *
8062  * If the counting op is released before the count is zero
8063  * it becomes a zombie.
8064  */
handleDeallocOp(Signal * signal,TcConnectionrecPtr regTcPtr)8065 void Dblqh::handleDeallocOp(Signal* signal,
8066                             TcConnectionrecPtr regTcPtr)
8067 {
8068   jam();
8069 
8070   ndbrequire(regTcPtr.p->m_dealloc_state != TcConnectionrec::DA_IDLE);
8071   const bool referringOpReleased =
8072     (regTcPtr.p->m_dealloc_state == TcConnectionrec::DA_DEALLOC_REFERENCE);
8073 
8074   const Uint32 newCount = decrDeallocRefCount(signal,
8075                                               regTcPtr.i);
8076 
8077   if (referringOpReleased)
8078   {
8079     jam();
8080     regTcPtr.p->m_dealloc_state = TcConnectionrec::DA_IDLE;
8081     regTcPtr.p->m_dealloc_data.m_dealloc_op_id = RNIL;
8082   }
8083   else
8084   {
8085     jam();
8086     // The dealloc op hosts the refcount, so it cannot be released until
8087     // the refcount reaches zero. Mark the dealloc op as a zombie op to
8088     // prevent its release.
8089     if (newCount != 0)
8090     {
8091       jam();
8092       regTcPtr.p->m_dealloc_state = TcConnectionrec::DA_DEALLOC_COUNT_ZOMBIE;
8093     }
8094   }
8095 }
8096 
8097 /**
8098  * execTUP_DEALLOCREQ
8099  *
8100  * Receive notification from ACC that a TUPle is no longer needed
8101  * ACC informs LQH of each operation involved in deallocation
8102  * so that LQH can determine when it is safe to ask TUP to release
8103  * the storage.
8104  *
8105  * 2 cases :
8106  *   i)  theData[5] != RNIL : Notification(s) of pending deallocation
8107  *   ii) theData[5] == RNIL : Deallocation triggered
8108  *
8109  * For commit, there can be 1 or more invocations of i) followed
8110  * by one invocation of ii)
8111  *
8112  * For abort, there will be 1 invocation of ii)
8113  *
8114  * From LQH's point of view :
8115  * 1) ACC informs LQH of a set of operations involved in deallocating a tuple
8116  *   - LQH should not release the tuple before *all* of those operations are
8117  *     complete
8118  *   - For each tuple + set of operations involved in deallocating it, ACC
8119  *     informs LQH of a single designated 'counting op' which is a member of
8120  *     the set and which might help LQH track the state of the set + the tuple
8121  * 2) ACC requires that LQH does not deallocate the tuple before
8122  *    ACC gives permission to do so.
8123  * 3) Special case (aborts) : Just a single trigger notification - LQH should
8124  *    deallocate on operation completion
8125  *
8126  * LQH : dealloc_iff (All notified ops complete && Trigger from ACC received)
8127  */
execTUP_DEALLOCREQ(Signal * signal)8128 void Dblqh::execTUP_DEALLOCREQ(Signal* signal)
8129 {
8130   TcConnectionrecPtr regTcPtr;
8131 
8132   jamEntry();
8133   regTcPtr.i = signal->theData[4];
8134   ndbrequire(tcConnect_pool.getValidPtr(regTcPtr));
8135 
8136   if (TRACENR_FLAG)
8137   {
8138     Local_key tmp;
8139     tmp.m_page_no = signal->theData[2];
8140     tmp.m_page_idx = signal->theData[3];
8141     TRACENR("TUP_DEALLOC: " << tmp <<
8142       (signal->theData[5] == RNIL ? " TRIGGER" : " NOTIFICATION") << endl);
8143   }
8144 
8145   regTcPtr.p->m_row_id.m_page_no = signal->theData[2];
8146   regTcPtr.p->m_row_id.m_page_idx = signal->theData[3];
8147 
8148   if (signal->theData[5] != RNIL)
8149   {
8150     jam();
8151     /**
8152      * Notification of Op involved in deallocation
8153      */
8154     incrDeallocRefCount(signal,
8155                         signal->theData[4],
8156                         signal->theData[5]);
8157   }
8158   else
8159   {
8160     jam();
8161     /**
8162      * Deallocation triggered by ACC
8163      * Now LQH decides what to do next.
8164      */
8165     if (regTcPtr.p->m_dealloc_state == TcConnectionrec::DA_IDLE)
8166     {
8167       jam();
8168       /**
8169        * ABORT case
8170        * Set count to 1 for quick dealloc on complete of this op
8171        */
8172       ndbrequire(regTcPtr.p->m_dealloc_data.m_dealloc_ref_count == RNIL);
8173       regTcPtr.p->m_dealloc_state = TcConnectionrec::DA_DEALLOC_COUNT;
8174       /* Init count to 1 as we are processing report_dealloc*/
8175       regTcPtr.p->m_dealloc_data.m_dealloc_ref_count = 1;
8176     }
8177     else
8178     {
8179       jam();
8180       /**
8181        * [Multi-op] COMMIT case
8182        * Decrement count to indicate 'permission to dealloc'
8183        * from ACC
8184        */
8185       decrDeallocRefCount(signal,
8186                           signal->theData[4]);
8187     }
8188   }
8189 }//Dblqh::execTUP_DEALLOCREQ()
8190 
8191 /* ************>> */
8192 /*  ACCKEYCONF  > */
8193 /* ************>> */
execACCKEYCONF(Signal * signal)8194 void Dblqh::execACCKEYCONF(Signal* signal)
8195 {
8196   jamEntry();
8197   if (ERROR_INSERTED(5095))
8198   {
8199     jam();
8200     g_eventLogger->info("LQH %u : ERRINS 5095 Delaying ACCKEYCONF",
8201                         instance());
8202     sendSignalWithDelay(reference(), GSN_ACCKEYCONF, signal, 10, 5);
8203     return;
8204   }
8205 
8206   if (ERROR_INSERTED(5094))
8207   {
8208     jam();
8209     g_eventLogger->info("LQH %u : ERRINS 5094 Passing ACCKEYCONF 1 and setting ERRINS 5095",
8210                         instance());
8211     SET_ERROR_INSERT_VALUE(5095);
8212   }
8213 
8214   if (ERROR_INSERTED(5096))
8215   {
8216     jam();
8217     g_eventLogger->info("LQH %u : ERRINS 5096 set when processing ACCKEYCONF, clearing",
8218                         instance());
8219     CLEAR_ERROR_INSERT_VALUE;
8220     //SET_ERROR_INSERT_VALUE(5097); // Kill on scan
8221   }
8222 
8223   Uint32 tcIndex = signal->theData[0];
8224 
8225   setup_key_pointers(tcIndex);
8226   Uint32 localKey1 = signal->theData[3];
8227   Uint32 localKey2 = signal->theData[4];
8228   TcConnectionrec * const regTcPtr = m_tc_connect_ptr.p;
8229 
8230   if (regTcPtr->transactionState != TcConnectionrec::WAIT_ACC)
8231   {
8232     jam();
8233     LQHKEY_abort(signal, 3, m_tc_connect_ptr);
8234     return;
8235   }//if
8236   if (unlikely(c_acc->checkOpPendingAbort(regTcPtr->accConnectrec)))
8237   {
8238     jam();
8239     /* Wait for Abort */
8240     return;
8241   }
8242   c_tup->prepareTUPKEYREQ(localKey1, localKey2, fragptr.p->tupFragptr);
8243   continueACCKEYCONF(signal, localKey1, localKey2, m_tc_connect_ptr);
8244 }
8245 
8246 void
continueACCKEYCONF(Signal * signal,Uint32 localKey1,Uint32 localKey2,const TcConnectionrecPtr tcConnectptr)8247 Dblqh::continueACCKEYCONF(Signal * signal,
8248                           Uint32 localKey1,
8249                           Uint32 localKey2,
8250                           const TcConnectionrecPtr tcConnectptr)
8251 {
8252   TcConnectionrec * const regTcPtr = tcConnectptr.p;
8253   /* ------------------------------------------------------------------------
8254    * IT IS NOW TIME TO CONTACT THE TUPLE MANAGER. THE TUPLE MANAGER NEEDS THE
8255    * INFORMATION ON WHICH TABLE AND FRAGMENT, THE LOCAL KEY AND IT NEEDS TO
8256    * KNOW THE TYPE OF OPERATION TO PERFORM. TUP CAN SEND THE ATTRINFO DATA
8257    * EITHER TO THE TC BLOCK OR DIRECTLY TO THE APPLICATION. THE SCHEMA VERSION
8258    * IS NEEDED SINCE TWO SCHEMA VERSIONS CAN BE ACTIVE SIMULTANEOUSLY ON A
8259    * TABLE.
8260    * ----------------------------------------------------------------------- */
8261   if (regTcPtr->operation == ZWRITE)
8262   {
8263     ndbassert(regTcPtr->seqNoReplica == 0 ||
8264 	      regTcPtr->activeCreat == Fragrecord::AC_NR_COPY);
8265     Uint32 op= signal->theData[1];
8266     Uint32 requestInfo = regTcPtr->reqinfo;
8267     if(likely(op == ZINSERT || op == ZUPDATE))
8268     {
8269       jam();
8270       regTcPtr->operation = op;
8271     }
8272     else
8273     {
8274       jam();
8275       warningEvent("Converting %d to ZUPDATE", op);
8276       op = regTcPtr->operation = ZUPDATE;
8277     }
8278     if (regTcPtr->seqNoReplica == 0)
8279     {
8280       jam();
8281       requestInfo &= ~(LqhKeyReq::RI_OPERATION_MASK << LqhKeyReq::RI_OPERATION_SHIFT);
8282       LqhKeyReq::setOperation(requestInfo, op);
8283       regTcPtr->reqinfo = requestInfo;
8284     }
8285   }//if
8286 
8287   /* ------------------------------------------------------------------------
8288    * IT IS NOW TIME TO CONTACT THE TUPLE MANAGER. THE TUPLE MANAGER NEEDS THE
8289    * INFORMATION ON WHICH TABLE AND FRAGMENT, THE LOCAL KEY AND IT NEEDS TO
8290    * KNOW THE TYPE OF OPERATION TO PERFORM. TUP CAN SEND THE ATTRINFO DATA
8291    * EITHER TO THE TC BLOCK OR DIRECTLY TO THE APPLICATION. THE SCHEMA VERSION
8292    * IS NEEDED SINCE TWO SCHEMA VERSIONS CAN BE ACTIVE SIMULTANEOUSLY ON A
8293    * TABLE.
8294    * ----------------------------------------------------------------------- */
8295   Fragrecord * regFragptr = fragptr.p;
8296   if (!regTcPtr->m_disk_table)
8297   {
8298     jamDebug();
8299     acckeyconf_tupkeyreq(signal,
8300                          regTcPtr,
8301                          regFragptr,
8302                          localKey1,
8303                          localKey2,
8304                          RNIL);
8305   }
8306   else
8307   {
8308     jamDebug();
8309     acckeyconf_load_diskpage(signal,
8310                              tcConnectptr,
8311                              regFragptr,
8312                              localKey1,
8313                              localKey2);
8314   }
8315 }
8316 
8317 void
acckeyconf_tupkeyreq(Signal * signal,TcConnectionrec * regTcPtr,Fragrecord * regFragptrP,Uint32 lkey1,Uint32 lkey2,Uint32 disk_page)8318 Dblqh::acckeyconf_tupkeyreq(Signal* signal, TcConnectionrec* regTcPtr,
8319 			    Fragrecord* regFragptrP,
8320 			    Uint32 lkey1, Uint32 lkey2,
8321 			    Uint32 disk_page)
8322 {
8323   Uint32 op = regTcPtr->operation;
8324   regTcPtr->transactionState = TcConnectionrec::WAIT_TUP;
8325   /* ------------------------------------------------------------------------
8326    * IT IS NOW TIME TO CONTACT THE TUPLE MANAGER. THE TUPLE MANAGER NEEDS THE
8327    * INFORMATION ON WHICH TABLE AND FRAGMENT, THE LOCAL KEY AND IT NEEDS TO
8328    * KNOW THE TYPE OF OPERATION TO PERFORM. TUP CAN SEND THE ATTRINFO DATA
8329    * EITHER TO THE TC BLOCK OR DIRECTLY TO THE APPLICATION. THE SCHEMA VERSION
8330    * IS NEEDED SINCE TWO SCHEMA VERSIONS CAN BE ACTIVE SIMULTANEOUSLY ON A
8331    * TABLE.
8332    * ----------------------------------------------------------------------- */
8333   Uint32 page_idx = lkey2;
8334   Uint32 page_no = lkey1;
8335   Uint32 Ttupreq = 0;
8336   Uint32 flags = regTcPtr->m_flags;
8337   TupKeyReq::setDirtyFlag(Ttupreq, regTcPtr->dirtyOp);
8338   TupKeyReq::setSimpleFlag(Ttupreq, regTcPtr->opSimple);
8339   TupKeyReq::setOperation(Ttupreq, op);
8340   TupKeyReq::setInterpretedFlag(Ttupreq, regTcPtr->opExec);
8341   TupKeyReq::setRowidFlag(Ttupreq, regTcPtr->m_use_rowid);
8342   TupKeyReq::setReorgFlag(Ttupreq, regTcPtr->m_reorg);
8343   TupKeyReq::setNrCopyFlag(Ttupreq,
8344                            (LqhKeyReq::getNrCopyFlag(regTcPtr->reqinfo) |
8345                            c_executing_redo_log));
8346 #ifdef ERROR_INSERT
8347   /* Ensure c_executing_redo_log isn't set when a read happens */
8348   ndbrequire(op != ZREAD || c_executing_redo_log == 0);
8349 #endif
8350 
8351   /* ---------------------------------------------------------------------
8352    * Clear interpreted mode bit since we do not want the next replica to
8353    * use interpreted mode. The next replica will receive a normal write.
8354    * --------------------------------------------------------------------- */
8355   regTcPtr->opExec = 0;
8356   /* ************< */
8357   /*  TUPKEYREQ  < */
8358   /* ************< */
8359   Uint32 sig0, sig1, sig2, sig3, sig4;
8360 
8361   TupKeyReq * const tupKeyReq = (TupKeyReq *)signal->getDataPtrSend();
8362   tupKeyReq->request = Ttupreq;
8363   tupKeyReq->keyRef1 = page_no;
8364   tupKeyReq->keyRef2 = page_idx;
8365   tupKeyReq->disk_page= disk_page;
8366 
8367 
8368   sig0 = regTcPtr->transid[0];
8369   sig1 = regTcPtr->transid[1];
8370   sig2 = regTcPtr->m_row_id.m_page_no;
8371   sig3 = regTcPtr->m_row_id.m_page_idx;
8372   sig4 = regTcPtr->applOprec;
8373 
8374   tupKeyReq->transId1 = sig0;
8375   tupKeyReq->transId2 = sig1;
8376   tupKeyReq->m_row_id_page_no = sig2;
8377   tupKeyReq->m_row_id_page_idx = sig3;
8378   tupKeyReq->opRef = sig4;
8379 
8380   sig0 = regTcPtr->tcBlockref;
8381   sig1 = regTcPtr->tcOprec;
8382   sig2 = regTcPtr->savePointId;
8383   sig3 = regTcPtr->applRef;
8384   sig4 = regTcPtr->totReclenAi;
8385 
8386   tupKeyReq->coordinatorTC = sig0;
8387   tupKeyReq->tcOpIndex = sig1;
8388   tupKeyReq->savePointId = sig2;
8389   tupKeyReq->applRef = sig3;
8390   tupKeyReq->attrBufLen = sig4;
8391 
8392   tupKeyReq->triggers =
8393     (regTcPtr->m_flags & TcConnectionrec::OP_NO_TRIGGERS) ?
8394     TupKeyReq::OP_NO_TRIGGERS :
8395     (regTcPtr->seqNoReplica == 0) ?
8396     TupKeyReq::OP_PRIMARY_REPLICA : TupKeyReq::OP_BACKUP_REPLICA;
8397 
8398   TRACE_OP(regTcPtr, "TUPKEYREQ");
8399 
8400   regTcPtr->m_use_rowid |= (op == ZINSERT || op == ZREFRESH);
8401   regTcPtr->m_row_id.m_page_no = page_no;
8402   regTcPtr->m_row_id.m_page_idx = page_idx;
8403 
8404   tupKeyReq->deferred_constraints =
8405     (flags & TcConnectionrec::OP_DEFERRED_CONSTRAINTS) != 0;
8406   tupKeyReq->disable_fk_checks =
8407     (flags & TcConnectionrec::OP_DISABLE_FK) != 0;
8408 
8409 
8410   /* Pass AttrInfo section if available in the TupKeyReq signal
8411    * We are still responsible for releasing it, TUP is just
8412    * borrowing it
8413    */
8414   if (tupKeyReq->attrBufLen > 0)
8415   {
8416     ndbassert( regTcPtr->attrInfoIVal != RNIL );
8417     c_tup->copyAttrinfo(regTcPtr->totReclenAi,
8418                         regTcPtr->attrInfoIVal);
8419   }
8420 #ifdef VM_TRACE
8421   tupKeyReq->fragPtr = regFragptrP->tupFragptr;
8422 #endif
8423   if (c_tup->execTUPKEYREQ(signal))
8424   {
8425     execTUPKEYCONF(signal);
8426     return;
8427   }
8428   else
8429   {
8430     execTUPKEYREF(signal);
8431     return;
8432   }
8433 }
8434 
8435 void
acckeyconf_load_diskpage(Signal * signal,TcConnectionrecPtr regTcPtr,Fragrecord * regFragptrP,Uint32 lkey1,Uint32 lkey2)8436 Dblqh::acckeyconf_load_diskpage(Signal* signal, TcConnectionrecPtr regTcPtr,
8437 				Fragrecord* regFragptrP,
8438                                 Uint32 lkey1, Uint32 lkey2)
8439 {
8440   int res;
8441   Uint32 disk_flag = regTcPtr.p->operation;
8442   disk_flag += (LqhKeyReq::getNrCopyFlag(regTcPtr.p->reqinfo) |
8443                 c_executing_redo_log) ? Page_cache_client::COPY_FRAG : 0;
8444   if((res= c_tup->load_diskpage(signal,
8445 				regTcPtr.p->tupConnectrec,
8446 				regFragptrP->tupFragptr,
8447 				lkey1, lkey2,
8448 				disk_flag)) > 0)
8449   {
8450     jamDebug();
8451     acckeyconf_tupkeyreq(signal, regTcPtr.p, regFragptrP, lkey1, lkey2, res);
8452   }
8453   else if(res == 0)
8454   {
8455     jamDebug();
8456     regTcPtr.p->transactionState = TcConnectionrec::WAIT_TUP;
8457     DEB_COPY(("(%u)get_page returned 0 for %u",
8458               instance(),
8459               regTcPtr.i));
8460     regTcPtr.p->m_row_id.m_page_no = lkey1;
8461     regTcPtr.p->m_row_id.m_page_idx = lkey2;
8462   }
8463   else
8464   {
8465     jamDebug();
8466     regTcPtr.p->transactionState = TcConnectionrec::WAIT_TUP;
8467     TupKeyRef * ref = (TupKeyRef *)signal->getDataPtr();
8468     ref->userRef= regTcPtr.i;
8469     if (res == -1)
8470     {
8471       jam();
8472       DEB_COPY(("(%u)get_page returned with -1", instance()));
8473       ref->errorCode= ~0;
8474     }
8475     else
8476     {
8477       jam();
8478       ref->errorCode = -res;
8479       DEB_COPY(("(%u)get_page returned with %d", instance(), -res));
8480     }
8481     execTUPKEYREF(signal);
8482     return;
8483   }
8484 }
8485 
8486 void
acckeyconf_load_diskpage_callback(Signal * signal,Uint32 callbackData,Uint32 disk_page)8487 Dblqh::acckeyconf_load_diskpage_callback(Signal* signal,
8488 					 Uint32 callbackData,
8489 					 Uint32 disk_page)
8490 {
8491   jamEntry();
8492   setup_key_pointers(callbackData);
8493   TcConnectionrecPtr tcConnectptr = m_tc_connect_ptr;
8494   FragrecordPtr fragPtr = fragptr;
8495   TcConnectionrec * const regTcPtr = tcConnectptr.p;
8496   TcConnectionrec::TransactionState state = regTcPtr->transactionState;
8497   if (likely(disk_page > 0 && state == TcConnectionrec::WAIT_TUP))
8498   {
8499 
8500     /**
8501      * We have returned from a real-time break, we need to set
8502      * up the proper pointers for a key execution.
8503      */
8504     jam();
8505     c_tup->prepareTUPKEYREQ(regTcPtr->m_row_id.m_page_no,
8506                             regTcPtr->m_row_id.m_page_idx,
8507                             fragPtr.p->tupFragptr);
8508     acckeyconf_tupkeyreq(signal, regTcPtr, fragPtr.p,
8509 			 regTcPtr->m_row_id.m_page_no,
8510 			 regTcPtr->m_row_id.m_page_idx,
8511 			 disk_page);
8512     return;
8513   }
8514   else if (state != TcConnectionrec::WAIT_TUP)
8515   {
8516     ndbrequire(state == TcConnectionrec::WAIT_TUP_TO_ABORT);
8517     TupKeyRef * ref = (TupKeyRef *)signal->getDataPtr();
8518     ref->userRef= callbackData;
8519     ref->errorCode= disk_page;
8520     execTUPKEYREF(signal);
8521   }
8522   else
8523   {
8524     TupKeyRef * ref = (TupKeyRef *)signal->getDataPtr();
8525     ref->userRef= callbackData;
8526     ref->errorCode= disk_page;
8527     execTUPKEYREF(signal);
8528     return;
8529   }
8530 }
8531 
8532 /* --------------------------------------------------------------------------
8533  * -------                       ENTER TUP...                         -------
8534  * ENTER TUPKEYCONF WITH
8535  *           TC_CONNECTPTR,
8536  *           TDATA2,     LOCAL KEY REFERENCE 1, ONLY INTERESTING AFTER INSERT
8537  *           TDATA3,     LOCAL KEY REFERENCE 1, ONLY INTERESTING AFTER INSERT
8538  *           TDATA4,     TOTAL LENGTH OF READ DATA SENT TO TC/APPLICATION
8539  *           TDATA5      TOTAL LENGTH OF UPDATE DATA SENT TO/FROM TUP
8540  *        GOTO TUPKEY_CONF
8541  *
8542  *  TAKE CARE OF RESPONSES FROM TUPLE MANAGER.
8543  * -------------------------------------------------------------------------- */
tupkeyConfLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)8544 void Dblqh::tupkeyConfLab(Signal* signal,
8545                           const TcConnectionrecPtr tcConnectptr)
8546 {
8547   TcConnectionrec * const regTcPtr = tcConnectptr.p;
8548 
8549 /* ---- GET OPERATION TYPE AND CHECK WHAT KIND OF OPERATION IS REQUESTED --- */
8550   const TupKeyConf * const tupKeyConf = (TupKeyConf *)&signal->theData[0];
8551   Uint32 activeCreat = regTcPtr->activeCreat;
8552   Uint32 readLen = tupKeyConf->readLength;
8553   Uint32 writeLen = tupKeyConf->writeLength;
8554 
8555   TRACE_OP(regTcPtr, "TUPKEYCONF");
8556 
8557   c_acc->execACCKEY_ORD(signal,
8558                         regTcPtr->accConnectrec,
8559                         regTcPtr->accConnectPtrP);
8560 
8561   jamEntryDebug();
8562   if (readLen != 0)
8563   {
8564     jamDebug();
8565 
8566     /* SET BIT 15 IN REQINFO */
8567     LqhKeyReq::setApplicationAddressFlag(regTcPtr->reqinfo, 1);
8568     regTcPtr->readlenAi = readLen;
8569   }//if
8570 
8571   if (regTcPtr->operation == ZREAD &&
8572       (regTcPtr->opSimple || regTcPtr->dirtyOp))
8573   {
8574     jamDebug();
8575     /* ----------------------------------------------------------------------
8576      * THE OPERATION IS A SIMPLE READ.
8577      * WE WILL IMMEDIATELY COMMIT THE OPERATION.
8578      * SINCE WE HAVE NOT RELEASED THE FRAGMENT LOCK
8579      * (FOR LOCAL CHECKPOINTS) YET
8580      * WE CAN GO IMMEDIATELY TO COMMIT_CONTINUE_AFTER_BLOCKED.
8581      * WE HAVE ALREADY SENT THE RESPONSE SO WE ARE NOT INTERESTED IN
8582      * READ LENGTH
8583      * --------------------------------------------------------------------- */
8584     commitContinueAfterBlockedLab(signal, tcConnectptr);
8585     return;
8586   }//if
8587 
8588   regTcPtr->totSendlenAi = writeLen;
8589   /* We will propagate / log writeLen words
8590    * Check that that is how many we have available to
8591    * propagate
8592    */
8593   ndbrequire(regTcPtr->totSendlenAi == regTcPtr->currTupAiLen);
8594 
8595   if (unlikely(activeCreat == Fragrecord::AC_NR_COPY))
8596   {
8597     jam();
8598     ndbrequire(regTcPtr->m_nr_delete.m_cnt);
8599     regTcPtr->m_nr_delete.m_cnt--;
8600     if (regTcPtr->m_nr_delete.m_cnt)
8601     {
8602       jam();
8603       /**
8604        * Let operation wait for pending NR operations
8605        *   even for before writing log...(as it's simpler)
8606        */
8607 
8608 #ifdef VM_TRACE
8609       /**
8610        * Only disk table can have pending ops...
8611        */
8612       TablerecPtr tablePtr;
8613       tablePtr.i = regTcPtr->tableref;
8614       ptrCheckGuard(tablePtr, ctabrecFileSize, tablerec);
8615       ndbrequire(tablePtr.p->m_disk_table);
8616 #endif
8617 
8618       return;
8619     }
8620   }
8621 
8622   rwConcludedLab(signal, tcConnectptr);
8623   return;
8624 }//Dblqh::tupkeyConfLab()
8625 
sendBatchedLqhkeyreq(Signal * signal,Uint32 lqhRef,Uint32 siglen,SectionHandle * handle)8626 void Dblqh::sendBatchedLqhkeyreq(Signal* signal, Uint32 lqhRef, Uint32 siglen, SectionHandle* handle)
8627 {
8628   jam();
8629   const Uint32 version = getNodeInfo(refToNode(lqhRef)).m_version;
8630   if (ndbd_frag_lqhkeyreq(version))
8631   {
8632     jam();
8633     sendBatchedFragmentedSignal(lqhRef,
8634                                 GSN_LQHKEYREQ,
8635                                 signal,
8636                                 siglen,
8637                                 JBB,
8638                                 handle,
8639                                 false);
8640   }
8641   else
8642   {
8643     jam();
8644     sendSignal(lqhRef,
8645                GSN_LQHKEYREQ,
8646                signal,
8647                siglen,
8648                JBB,
8649                handle);
8650   }
8651 }
8652 
8653 /* --------------------------------------------------------------------------
8654  *     THE CODE IS FOUND IN THE SIGNAL RECEPTION PART OF LQH
8655  * -------------------------------------------------------------------------- */
rwConcludedLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)8656 void Dblqh::rwConcludedLab(Signal* signal,
8657                            const TcConnectionrecPtr tcConnectptr)
8658 {
8659   TcConnectionrec * const regTcPtr = tcConnectptr.p;
8660   /* ------------------------------------------------------------------------
8661    *  WE HAVE NOW CONCLUDED READING/WRITING IN ACC AND TUP FOR THIS OPERATION.
8662    *  IT IS NOW TIME TO LOG THE OPERATION, SEND REQUEST TO NEXT NODE OR TC AND
8663    *  FOR SOME TYPES OF OPERATIONS IT IS EVEN TIME TO COMMIT THE OPERATION.
8664    * ------------------------------------------------------------------------ */
8665   if (regTcPtr->operation == ZREAD) {
8666     jam();
8667     /* ----------------------------------------------------------------------
8668      * A NORMAL READ OPERATION IS NOT LOGGED BUT IS NOT COMMITTED UNTIL THE
8669      * COMMIT SIGNAL ARRIVES. THUS WE CONTINUE PACKING THE RESPONSE.
8670      * ---------------------------------------------------------------------- */
8671     packLqhkeyreqLab(signal, tcConnectptr);
8672     return;
8673   } else {
8674     FragrecordPtr regFragptr = fragptr;
8675     if (regFragptr.p->logFlag == Fragrecord::STATE_FALSE){
8676       if (regTcPtr->dirtyOp == ZTRUE) {
8677         jam();
8678 	/* ------------------------------------------------------------------
8679 	 * THIS OPERATION WAS A WRITE OPERATION THAT DO NOT NEED LOGGING AND
8680 	 * THAT CAN CAN  BE COMMITTED IMMEDIATELY.
8681 	 * ----------------------------------------------------------------- */
8682         commitContinueAfterBlockedLab(signal, tcConnectptr);
8683         return;
8684       } else {
8685         jam();
8686 	/* ------------------------------------------------------------------
8687 	 * A NORMAL WRITE OPERATION ON A FRAGMENT WHICH DO NOT NEED LOGGING.
8688 	 * WE WILL PACK THE REQUEST/RESPONSE TO THE NEXT NODE/TO TC.
8689 	 * ------------------------------------------------------------------ */
8690         regTcPtr->logWriteState = TcConnectionrec::NOT_WRITTEN;
8691         packLqhkeyreqLab(signal, tcConnectptr);
8692         return;
8693       }//if
8694     } else {
8695       jam();
8696       /* --------------------------------------------------------------------
8697        * A DIRTY OPERATION WHICH NEEDS LOGGING. WE START BY LOGGING THE
8698        * REQUEST. IN THIS CASE WE WILL RELEASE THE FRAGMENT LOCK FIRST.
8699        * --------------------------------------------------------------------
8700        * A NORMAL WRITE OPERATION THAT NEEDS LOGGING AND WILL NOT BE
8701        * PREMATURELY COMMITTED.
8702        * -------------------------------------------------------------------- */
8703       logLqhkeyreqLab(signal, tcConnectptr);
8704       return;
8705     }//if
8706   }//if
8707 }//Dblqh::rwConcludedLab()
8708 
rwConcludedAiLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)8709 void Dblqh::rwConcludedAiLab(Signal* signal,
8710                              const TcConnectionrecPtr tcConnectptr)
8711 {
8712   TcConnectionrec * const regTcPtr = tcConnectptr.p;
8713   fragptr.i = regTcPtr->fragmentptr;
8714   /* ------------------------------------------------------------------------
8715    * WE HAVE NOW CONCLUDED READING/WRITING IN ACC AND TUP FOR THIS OPERATION.
8716    * IT IS NOW TIME TO LOG THE OPERATION, SEND REQUEST TO NEXT NODE OR TC AND
8717    * FOR SOME TYPES OF OPERATIONS IT IS EVEN TIME TO COMMIT THE OPERATION.
8718    * IN THIS CASE WE HAVE ALREADY RELEASED THE FRAGMENT LOCK.
8719    * ERROR CASES AT FRAGMENT CREATION AND STAND-BY NODES ARE THE REASONS FOR
8720    * COMING HERE.
8721    * ------------------------------------------------------------------------ */
8722   if (regTcPtr->operation == ZREAD) {
8723     if (regTcPtr->opSimple == 1) {
8724       jam();
8725       /* --------------------------------------------------------------------
8726        * THE OPERATION IS A SIMPLE READ. WE WILL IMMEDIATELY COMMIT THE
8727        * OPERATION.
8728        * -------------------------------------------------------------------- */
8729       localCommitLab(signal, tcConnectptr);
8730       return;
8731     } else {
8732       jam();
8733       /* --------------------------------------------------------------------
8734        * A NORMAL READ OPERATION IS NOT LOGGED BUT IS NOT COMMITTED UNTIL
8735        * THE COMMIT SIGNAL ARRIVES. THUS WE CONTINUE PACKING THE RESPONSE.
8736        * -------------------------------------------------------------------- */
8737       c_fragment_pool.getPtr(fragptr);
8738       packLqhkeyreqLab(signal, tcConnectptr);
8739       return;
8740     }//if
8741   } else {
8742     jam();
8743     c_fragment_pool.getPtr(fragptr);
8744     if (fragptr.p->logFlag == Fragrecord::STATE_FALSE) {
8745       if (regTcPtr->dirtyOp == ZTRUE) {
8746 	/* ------------------------------------------------------------------
8747 	 * THIS OPERATION WAS A WRITE OPERATION THAT DO NOT NEED LOGGING AND
8748 	 * THAT CAN CAN  BE COMMITTED IMMEDIATELY.
8749 	 * ----------------------------------------------------------------- */
8750         jam();
8751 	/* ----------------------------------------------------------------
8752 	 * IT MUST BE ACTIVE CREATION OF A FRAGMENT.
8753 	 * ---------------------------------------------------------------- */
8754         localCommitLab(signal, tcConnectptr);
8755         return;
8756       } else {
8757 	/* ------------------------------------------------------------------
8758 	 * A NORMAL WRITE OPERATION ON A FRAGMENT WHICH DO NOT NEED LOGGING.
8759 	 * WE WILL PACK THE REQUEST/RESPONSE TO THE NEXT NODE/TO TC.
8760 	 * ------------------------------------------------------------------ */
8761         jam();
8762 	  /* ---------------------------------------------------------------
8763 	   * IT MUST BE ACTIVE CREATION OF A FRAGMENT.
8764 	   * NOT A DIRTY OPERATION THUS PACK REQUEST/RESPONSE.
8765 	   * ---------------------------------------------------------------- */
8766         regTcPtr->logWriteState = TcConnectionrec::NOT_WRITTEN;
8767         packLqhkeyreqLab(signal, tcConnectptr);
8768         return;
8769       }//if
8770     } else {
8771       jam();
8772       /* --------------------------------------------------------------------
8773        * A DIRTY OPERATION WHICH NEEDS LOGGING. WE START BY LOGGING THE
8774        * REQUEST. IN THIS CASE WE WILL RELEASE THE FRAGMENT LOCK FIRST.
8775        * -------------------------------------------------------------------- */
8776       /* A NORMAL WRITE OPERATION THAT NEEDS LOGGING AND WILL NOT BE
8777        * PREMATURELY COMMITTED.
8778        * -------------------------------------------------------------------- */
8779       logLqhkeyreqLab(signal, tcConnectptr);
8780       return;
8781     }//if
8782   }//if
8783 }//Dblqh::rwConcludedAiLab()
8784 
8785 /* ##########################################################################
8786  * #######                            LOG MODULE                      #######
8787  *
8788  * ##########################################################################
8789  * --------------------------------------------------------------------------
8790  *       THE LOG MODULE HANDLES THE READING AND WRITING OF THE LOG
8791  *       IT IS ALSO RESPONSIBLE FOR HANDLING THE SYSTEM RESTART.
8792  *       IT CONTROLS THE SYSTEM RESTART IN TUP AND ACC AS WELL.
8793  * -------------------------------------------------------------------------- */
logLqhkeyreqLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)8794 void Dblqh::logLqhkeyreqLab(Signal* signal,
8795                             const TcConnectionrecPtr tcConnectptr)
8796 {
8797   UintR tcurrentFilepage;
8798   TcConnectionrecPtr tmpTcConnectptr;
8799 
8800   const bool out_of_log_buffer = cnoOfLogPages < ZMIN_LOG_PAGES_OPERATION;
8801 
8802   TcConnectionrec * const regTcPtr = tcConnectptr.p;
8803   logPartPtr.i = regTcPtr->m_log_part_ptr_i;
8804   ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
8805   bool abort_on_redo_problems =
8806     (LqhKeyReq::getQueueOnRedoProblemFlag(regTcPtr->reqinfo) == 0);
8807 
8808 /* -------------------------------------------------- */
8809 /*       THIS PART IS USED TO WRITE THE LOG           */
8810 /* -------------------------------------------------- */
8811 /* -------------------------------------------------- */
8812 /*       CHECK IF A LOG OPERATION IS ONGOING ALREADY. */
8813 /*       IF SO THEN QUEUE THE OPERATION FOR LATER     */
8814 /*       RESTART WHEN THE LOG PART IS FREE AGAIN.     */
8815 /* -------------------------------------------------- */
8816   LogPartRecord * const regLogPartPtr = logPartPtr.p;
8817   const bool problem = out_of_log_buffer || regLogPartPtr->m_log_problems != 0;
8818   if (unlikely(problem || ERROR_INSERTED(5083) || ERROR_INSERTED(5032)))
8819   {
8820     /* -----------------------------------------------------------------*/
8821     /* P_TAIL_PROBLEM indicates that the redo log is full. If redo      */
8822     /* log writes are queued in this situation, they will have to wait  */
8823     /* until redo space is freed. Redo space will not be freed          */
8824     /* until the next LCP completes, which can take a long time. To     */
8825     /* avoid long waits and timeouts, redo log writes are aborted       */
8826     /* in case of a P_TAIL_PROBLEM.                                     */
8827     /* -----------------------------------------------------------------*/
8828     if (abort_on_redo_problems ||
8829         regLogPartPtr->m_log_problems & LogPartRecord::P_TAIL_PROBLEM ||
8830         ERROR_INSERTED(5032))
8831     {
8832       jam();
8833       if (ERROR_INSERTED_CLEAR(5032))
8834       {
8835         const Uint32 saved_cnoOfLogPages = cnoOfLogPages;
8836         // simulate abort on temporary out-of-redo error
8837         cnoOfLogPages = ZMIN_LOG_PAGES_OPERATION - 1;
8838         logLqhkeyreqLab_problems(signal, tcConnectptr);
8839         cnoOfLogPages = saved_cnoOfLogPages;
8840         return;
8841       }
8842       logLqhkeyreqLab_problems(signal, tcConnectptr);
8843       return;
8844     }
8845     else
8846     {
8847       jam();
8848       goto queueop;
8849     }
8850   }
8851 
8852   if (regLogPartPtr->logPartState == LogPartRecord::IDLE)
8853   {
8854     ;
8855   }
8856   else if (regLogPartPtr->logPartState == LogPartRecord::ACTIVE)
8857   {
8858 queueop:
8859     jam();
8860     linkWaitLog(signal, logPartPtr, logPartPtr.p->m_log_prepare_queue, tcConnectptr);
8861     regTcPtr->transactionState = TcConnectionrec::LOG_QUEUED;
8862     return;
8863   }
8864   else
8865   {
8866     ndbabort();
8867     return;
8868   }//if
8869 
8870   increment_committed_mbytes(regLogPartPtr,
8871                              regTcPtr);
8872   logFilePtr.i = regLogPartPtr->currentLogfile;
8873   ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
8874 /* -------------------------------------------------- */
8875 /*       CHECK IF A NEW MBYTE IS TO BE STARTED. IF    */
8876 /*       SO INSERT A NEXT LOG RECORD, WRITE THE LOG   */
8877 /*       AND PLACE THE LOG POINTER ON THE NEW POSITION*/
8878 /*       IF A NEW FILE IS TO BE USED, CHANGE FILE AND */
8879 /*       ALSO START OPENING THE NEXT LOG FILE. IF A   */
8880 /*       LAP HAS BEEN COMPLETED THEN ADD ONE TO LAP   */
8881 /*       COUNTER.                                     */
8882 /* -------------------------------------------------- */
8883   checkNewMbyte(signal, tcConnectptr.p);
8884 /* -------------------------------------------------- */
8885 /*       INSERT THE OPERATION RECORD LAST IN THE LIST */
8886 /*       OF NOT COMPLETED OPERATIONS. ALSO RECORD THE */
8887 /*       FILE NO, PAGE NO AND PAGE INDEX OF THE START */
8888 /*       OF THIS LOG RECORD.                          */
8889 /*       IT IS NOT ALLOWED TO INSERT IT INTO THE LIST */
8890 /*       BEFORE CHECKING THE NEW MBYTE SINCE THAT WILL*/
8891 /*       CAUSE THE OLD VALUES OF TC_CONNECTPTR TO BE  */
8892 /*       USED IN WRITE_FILE_DESCRIPTOR.               */
8893 /* -------------------------------------------------- */
8894   Uint32 tcIndex = tcConnectptr.i;
8895   tmpTcConnectptr.i = regLogPartPtr->lastLogTcrec;
8896   regLogPartPtr->lastLogTcrec = tcIndex;
8897   if (tmpTcConnectptr.i == RNIL) {
8898     jam();
8899     regLogPartPtr->firstLogTcrec = tcIndex;
8900   } else {
8901     ndbrequire(tcConnect_pool.getValidPtr(tmpTcConnectptr));
8902     tmpTcConnectptr.p->nextLogTcrec = tcIndex;
8903   }//if
8904   Uint32 fileNo = logFilePtr.p->fileNo;
8905   tcurrentFilepage = logFilePtr.p->currentFilepage;
8906   logPagePtr.i = logFilePtr.p->currentLogpage;
8907   regTcPtr->nextLogTcrec = RNIL;
8908   regTcPtr->prevLogTcrec = tmpTcConnectptr.i;
8909   ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
8910   Uint32 pageIndex = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX];
8911   regTcPtr->logStartFileNo = fileNo;
8912   regTcPtr->logStartPageNo = tcurrentFilepage;
8913   regTcPtr->logStartPageIndex = pageIndex;
8914 /* -------------------------------------------------- */
8915 /*       WRITE THE LOG HEADER OF THIS OPERATION.      */
8916 /* -------------------------------------------------- */
8917   writeLogHeader(signal, tcConnectptr.p, regLogPartPtr);
8918 /* -------------------------------------------------- */
8919 /*       WRITE THE TUPLE KEY OF THIS OPERATION.       */
8920 /* -------------------------------------------------- */
8921   writeKey(signal, tcConnectptr.p);
8922 /* -------------------------------------------------- */
8923 /*       WRITE THE ATTRIBUTE INFO OF THIS OPERATION.  */
8924 /* -------------------------------------------------- */
8925   writeAttrinfoLab(signal, tcConnectptr.p);
8926 
8927 /* -------------------------------------------------- */
8928 /*       RESET THE STATE OF THE LOG PART. IF ANY      */
8929 /*       OPERATIONS HAVE QUEUED THEN START THE FIRST  */
8930 /*       OF THESE.                                    */
8931 /* -------------------------------------------------- */
8932 /* -------------------------------------------------- */
8933 /*       CONTINUE WITH PACKING OF LQHKEYREQ           */
8934 /* -------------------------------------------------- */
8935   tcurrentFilepage = logFilePtr.p->currentFilepage;
8936   if (logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] == ZPAGE_HEADER_SIZE) {
8937     jam();
8938     tcurrentFilepage--;
8939   }//if
8940   regTcPtr->logStopPageNo = tcurrentFilepage;
8941   regTcPtr->logWriteState = TcConnectionrec::WRITTEN;
8942   if (regTcPtr->abortState != TcConnectionrec::ABORT_IDLE) {
8943 /* -------------------------------------------------- */
8944 /*       AN ABORT HAVE BEEN ORDERED. THE ABORT WAITED */
8945 /*       FOR THE LOG WRITE TO BE COMPLETED. NOW WE    */
8946 /*       CAN PROCEED WITH THE NORMAL ABORT HANDLING.  */
8947 /* -------------------------------------------------- */
8948     jam();
8949     abortCommonLab(signal, tcConnectptr);
8950     return;
8951   }//if
8952   if (regTcPtr->dirtyOp != ZTRUE) {
8953     packLqhkeyreqLab(signal, tcConnectptr);
8954   } else {
8955     jam();
8956     /* ----------------------------------------------------------------------
8957      * I NEED TO INSERT A COMMIT LOG RECORD SINCE WE ARE WRITING LOG IN THIS
8958      * TRANSACTION. SINCE WE RELEASED THE LOG LOCK JUST NOW NO ONE ELSE CAN BE
8959      * ACTIVE IN WRITING THE LOG. WE THUS WRITE THE LOG WITHOUT GETTING A LOCK
8960      * SINCE WE ARE ONLY WRITING A COMMIT LOG RECORD.
8961      * ---------------------------------------------------------------------- */
8962     writeCommitLog(signal, logPartPtr, tcConnectptr.p);
8963     /* ----------------------------------------------------------------------
8964      * DIRTY OPERATIONS SHOULD COMMIT BEFORE THEY PACK THE REQUEST/RESPONSE.
8965      * ---------------------------------------------------------------------- */
8966     localCommitLab(signal, tcConnectptr);
8967   }//if
8968 }//Dblqh::logLqhkeyreqLab()
8969 
8970 void
logLqhkeyreqLab_problems(Signal * signal,const TcConnectionrecPtr tcConnectptr)8971 Dblqh::logLqhkeyreqLab_problems(Signal * signal,
8972                                 const TcConnectionrecPtr tcConnectptr)
8973 {
8974   jam();
8975   LogPartRecord * const regLogPartPtr = logPartPtr.p;
8976   Uint32 problems = regLogPartPtr->m_log_problems;
8977 
8978   if (cnoOfLogPages < ZMIN_LOG_PAGES_OPERATION)
8979   {
8980     jam();
8981     terrorCode = ZTEMPORARY_REDO_LOG_FAILURE;
8982   }
8983   else if ((problems & LogPartRecord::P_TAIL_PROBLEM) != 0)
8984   {
8985     jam();
8986     terrorCode = ZTAIL_PROBLEM_IN_LOG_ERROR;
8987   }
8988   else if ((problems & LogPartRecord::P_REDO_IO_PROBLEM) != 0)
8989   {
8990     jam();
8991     terrorCode = ZREDO_IO_PROBLEM;
8992   }
8993   else if ((problems & LogPartRecord::P_FILE_CHANGE_PROBLEM) != 0)
8994   {
8995     jam();
8996     terrorCode = ZFILE_CHANGE_PROBLEM_IN_LOG_ERROR;
8997   }
8998   else
8999   {
9000     if (ERROR_INSERTED(5083))
9001     {
9002       terrorCode = 266;
9003     }
9004   }
9005   abortErrorLab(signal, tcConnectptr);
9006 }
9007 
9008 void
update_log_problem(Signal * signal,Ptr<LogPartRecord> partPtr,Uint32 problem,bool value)9009 Dblqh::update_log_problem(Signal* signal, Ptr<LogPartRecord> partPtr,
9010                           Uint32 problem, bool value)
9011 {
9012   Uint32 problems = partPtr.p->m_log_problems;
9013   if (value)
9014   {
9015     /**
9016      * set
9017      */
9018     jam();
9019     if ((problems & problem) == 0)
9020     {
9021       jam();
9022       problems |= problem;
9023     }
9024   }
9025   else
9026   {
9027     /**
9028      * clear
9029      */
9030     jam();
9031     if ((problems & problem) != 0)
9032     {
9033       jam();
9034       problems &= ~(Uint32)problem;
9035 
9036       if (partPtr.p->LogLqhKeyReqSent == ZFALSE &&
9037           (!partPtr.p->m_log_prepare_queue.isEmpty() ||
9038            !partPtr.p->m_log_complete_queue.isEmpty()))
9039       {
9040         jam();
9041 
9042         partPtr.p->LogLqhKeyReqSent = ZTRUE;
9043         signal->theData[0] = ZLOG_LQHKEYREQ;
9044         signal->theData[1] = partPtr.i;
9045         sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
9046       }
9047     }
9048   }
9049   partPtr.p->m_log_problems = problems;
9050 }
9051 
9052 /* ------------------------------------------------------------------------- */
9053 /* -------                        SEND LQHKEYREQ                             */
9054 /*                                                                           */
9055 /* NO STATE CHECKING SINCE THE SIGNAL IS A LOCAL SIGNAL. THE EXECUTION OF    */
9056 /* THE OPERATION IS COMPLETED. IT IS NOW TIME TO SEND THE OPERATION TO THE   */
9057 /* NEXT REPLICA OR TO TC.                                                    */
9058 /* ------------------------------------------------------------------------- */
packLqhkeyreqLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)9059 void Dblqh::packLqhkeyreqLab(Signal* signal,
9060                              const TcConnectionrecPtr tcConnectptr)
9061 {
9062   TcConnectionrec * const regTcPtr = tcConnectptr.p;
9063   if (regTcPtr->nextReplica == ZNIL) {
9064 /* ------------------------------------------------------------------------- */
9065 /* -------               SEND LQHKEYCONF                             ------- */
9066 /*                                                                           */
9067 /* ------------------------------------------------------------------------- */
9068     sendLqhkeyconfTc(signal, regTcPtr->tcBlockref, tcConnectptr);
9069     if (! (regTcPtr->dirtyOp ||
9070            (regTcPtr->operation == ZREAD && regTcPtr->opSimple)))
9071     {
9072       jamDebug();
9073       regTcPtr->transactionState = TcConnectionrec::PREPARED;
9074       releaseOprec(signal, tcConnectptr);
9075     } else {
9076       jamDebug();
9077 
9078 /*************************************************************>*/
9079 /*       DIRTY WRITES ARE USED IN TWO SITUATIONS. THE FIRST    */
9080 /*       SITUATION IS WHEN THEY ARE USED TO UPDATE COUNTERS AND*/
9081 /*       OTHER ATTRIBUTES WHICH ARE NOT SENSITIVE TO CONSISTE- */
9082 /*       NCY. THE SECOND SITUATION IS BY OPERATIONS THAT ARE   */
9083 /*       SENT AS PART OF A COPY FRAGMENT PROCESS.              */
9084 /*                                                             */
9085 /*       DURING A COPY FRAGMENT PROCESS THERE IS NO LOGGING    */
9086 /*       ONGOING SINCE THE FRAGMENT IS NOT COMPLETE YET. THE   */
9087 /*       LOGGING STARTS AFTER COMPLETING THE LAST COPY TUPLE   */
9088 /*       OPERATION. THE EXECUTION OF THE LAST COPY TUPLE DOES  */
9089 /*       ALSO START A LOCAL CHECKPOINT SO THAT THE FRAGMENT    */
9090 /*       REPLICA IS RECOVERABLE. THUS GLOBAL CHECKPOINT ID FOR */
9091 /*       THOSE OPERATIONS ARE NOT INTERESTING.                 */
9092 /*                                                             */
9093 /*       A DIRTY WRITE IS BY DEFINITION NOT CONSISTENT. THUS   */
9094 /*       IT CAN USE ANY GLOBAL CHECKPOINT. THE IDEA HERE IS TO */
9095 /*       ALWAYS USE THE LATEST DEFINED GLOBAL CHECKPOINT ID IN */
9096 /*       THIS NODE.                                            */
9097 /*************************************************************>*/
9098       cleanUp(signal, tcConnectptr);
9099     }//if
9100     return;
9101   }//if
9102 /* ------------------------------------------------------------------------- */
9103 /* -------            SEND LQHKEYREQ                                 ------- */
9104 /*                                                                           */
9105 /* ------------------------------------------------------------------------- */
9106 /* ------------------------------------------------------------------------- */
9107 /* THERE ARE MORE REPLICAS TO SEND THE OPERATION TO. A NEW LQHKEYREQ WILL BE */
9108 /* PREPARED FOR THE NEXT REPLICA.                                            */
9109 /* ------------------------------------------------------------------------- */
9110 /* CLEAR REPLICA TYPE, ATTRINFO INDICATOR (IN LQHKEYREQ),                    */
9111 /* INTERPRETED EXECUTION, SEQUENTIAL NUMBER OF REPLICA.                      */
9112 // Set bit indicating Client and TC record not the same.
9113 // Set readlenAi indicator if readlenAi != 0
9114 // Stored Procedure Indicator not set.
9115 /* ------------------------------------------------------------------------- */
9116   LqhKeyReq * const lqhKeyReq = (LqhKeyReq *)&signal->theData[0];
9117 
9118   UintR Treqinfo;
9119   UintR sig0, sig1, sig2, sig3, sig4, sig5, sig6;
9120   Treqinfo = preComputedRequestInfoMask & regTcPtr->reqinfo;
9121 
9122   jam();
9123   /* Send long LqhKeyReq to next replica if it can support it */
9124   bool sendLongReq= true;
9125   if (ERROR_INSERTED(5051))
9126   {
9127     sendLongReq = false;
9128   }
9129 
9130   UintR TAiLen = sendLongReq ?
9131     0 :
9132     MIN(regTcPtr->totSendlenAi, LqhKeyReq::MaxAttrInfo);
9133 
9134   /* Long LQHKeyReq uses section size for key length */
9135   Uint32 lqhKeyLen= sendLongReq?
9136     0 :
9137     regTcPtr->primKeyLen;
9138 
9139   UintR TapplAddressIndicator = (regTcPtr->nextSeqNoReplica == 0 ? 0 : 1);
9140   LqhKeyReq::setApplicationAddressFlag(Treqinfo, TapplAddressIndicator);
9141   LqhKeyReq::setInterpretedFlag(Treqinfo, regTcPtr->opExec);
9142   LqhKeyReq::setSeqNoReplica(Treqinfo, regTcPtr->nextSeqNoReplica);
9143   LqhKeyReq::setAIInLqhKeyReq(Treqinfo, TAiLen);
9144   LqhKeyReq::setKeyLen(Treqinfo,lqhKeyLen);
9145 
9146   regTcPtr->m_use_rowid |=
9147     fragptr.p->m_copy_started_state == Fragrecord::AC_NR_COPY;
9148   LqhKeyReq::setRowidFlag(Treqinfo, regTcPtr->m_use_rowid);
9149 
9150   if (LqhKeyReq::getRowidFlag(Treqinfo))
9151   {
9152     //ndbassert(LqhKeyReq::getOperation(Treqinfo) == ZINSERT);
9153   }
9154   else
9155   {
9156 #ifdef VM_TRACE
9157     if (fragptr.p->m_copy_started_state != Fragrecord::AC_IGNORED)
9158     {
9159       Uint32 nextNodeId = regTcPtr->nextReplica;
9160       ndbassert(LqhKeyReq::getOperation(Treqinfo) != ZINSERT ||
9161                 get_node_status(nextNodeId) != ZNODE_UP);
9162     }
9163 #endif
9164   }
9165 
9166   UintR TreadLenAiInd = (regTcPtr->readlenAi == 0 ? 0 : 1);
9167   UintR TsameLqhAndClient = (tcConnectptr.i ==
9168                              regTcPtr->tcOprec ? 0 : 1);
9169   LqhKeyReq::setSameClientAndTcFlag(Treqinfo, TsameLqhAndClient);
9170   LqhKeyReq::setReturnedReadLenAIFlag(Treqinfo, TreadLenAiInd);
9171 
9172   /* Long LQHKeyReq uses section size for AttrInfo length */
9173   UintR TotReclenAi = sendLongReq ?
9174     0 :
9175     regTcPtr->totSendlenAi;
9176 
9177   LqhKeyReq::setReorgFlag(TotReclenAi, regTcPtr->m_reorg);
9178 
9179 /* ------------------------------------------------------------------------- */
9180 /* WE ARE NOW PREPARED TO SEND THE LQHKEYREQ. WE HAVE TO DECIDE IF ATTRINFO  */
9181 /* IS INCLUDED IN THE LQHKEYREQ SIGNAL AND THEN SEND IT.                     */
9182 /* TAKE OVER SCAN OPERATION IS NEVER USED ON BACKUPS, LOG RECORDS AND START-UP*/
9183 /* OF NEW REPLICA AND THUS ONLY TOT_SENDLEN_AI IS USED THE UPPER 16 BITS ARE */
9184 /* ZERO.                                                                     */
9185 /* ------------------------------------------------------------------------- */
9186   sig0 = tcConnectptr.i;
9187   sig1 = regTcPtr->savePointId;
9188   sig2 = regTcPtr->hashValue;
9189   sig4 = regTcPtr->tcBlockref;
9190 
9191   lqhKeyReq->clientConnectPtr = sig0;
9192   lqhKeyReq->attrLen = TotReclenAi;
9193   lqhKeyReq->savePointId = sig1;
9194   lqhKeyReq->hashValue = sig2;
9195   lqhKeyReq->requestInfo = Treqinfo;
9196   lqhKeyReq->tcBlockref = sig4;
9197 
9198   sig0 = regTcPtr->tableref + ((regTcPtr->schemaVersion << 16) & 0xFFFF0000);
9199   sig1 = regTcPtr->fragmentid + (regTcPtr->nodeAfterNext[0] << 16);
9200   sig2 = regTcPtr->transid[0];
9201   sig3 = regTcPtr->transid[1];
9202   sig4 = regTcPtr->applRef;
9203   sig5 = regTcPtr->applOprec;
9204   sig6 = regTcPtr->tcOprec;
9205   UintR nextPos = (TapplAddressIndicator << 1);
9206 
9207   lqhKeyReq->tableSchemaVersion = sig0;
9208   lqhKeyReq->fragmentData = sig1;
9209   lqhKeyReq->transId1 = sig2;
9210   lqhKeyReq->transId2 = sig3;
9211   lqhKeyReq->numFiredTriggers = regTcPtr->numFiredTriggers;
9212   lqhKeyReq->variableData[0] = sig4;
9213   lqhKeyReq->variableData[1] = sig5;
9214   lqhKeyReq->variableData[2] = sig6;
9215 
9216   nextPos += TsameLqhAndClient;
9217 
9218   if ((regTcPtr->lastReplicaNo - regTcPtr->nextSeqNoReplica) > 1) {
9219     sig0 = (UintR)regTcPtr->nodeAfterNext[1] +
9220            (UintR)(regTcPtr->nodeAfterNext[2] << 16);
9221     lqhKeyReq->variableData[nextPos] = sig0;
9222     nextPos++;
9223   }//if
9224   sig0 = regTcPtr->readlenAi;
9225   lqhKeyReq->variableData[nextPos] = sig0;
9226   nextPos += TreadLenAiInd;
9227 
9228   if (!sendLongReq)
9229   {
9230     /* Short LQHKEYREQ to older LQH
9231      * First few words of KeyInfo go into LQHKEYREQ
9232      * Sometimes have no Keyinfo
9233      */
9234     if (regTcPtr->primKeyLen != 0)
9235     {
9236       SegmentedSectionPtr keyInfoSection;
9237 
9238       ndbassert(regTcPtr->keyInfoIVal != RNIL);
9239 
9240       getSection(keyInfoSection, regTcPtr->keyInfoIVal);
9241       SectionReader keyInfoReader(keyInfoSection, g_sectionSegmentPool);
9242 
9243       UintR keyLenInLqhKeyReq= MIN(LqhKeyReq::MaxKeyInfo,
9244                                    regTcPtr->primKeyLen);
9245 
9246       keyInfoReader.getWords(&lqhKeyReq->variableData[nextPos],
9247                              keyLenInLqhKeyReq);
9248 
9249       nextPos+= keyLenInLqhKeyReq;
9250     }
9251   }
9252 
9253   sig0 = regTcPtr->gci_hi;
9254   Local_key tmp = regTcPtr->m_row_id;
9255 
9256   lqhKeyReq->variableData[nextPos + 0] = tmp.m_page_no;
9257   lqhKeyReq->variableData[nextPos + 1] = tmp.m_page_idx;
9258   nextPos += 2*LqhKeyReq::getRowidFlag(Treqinfo);
9259 
9260   lqhKeyReq->variableData[nextPos + 0] = sig0;
9261   nextPos += LqhKeyReq::getGCIFlag(Treqinfo);
9262 
9263   // pass full instance key for remote to map to real instance
9264   BlockReference lqhRef = numberToRef(DBLQH,
9265                                       fragptr.p->lqhInstanceKey,
9266                                       regTcPtr->nextReplica);
9267 
9268   if (likely(sendLongReq))
9269   {
9270     /* Long LQHKEYREQ, attach KeyInfo and AttrInfo
9271      * sections to signal
9272      */
9273     SectionHandle handle(this);
9274     handle.m_cnt= 0;
9275 
9276     if (regTcPtr->primKeyLen > 0)
9277     {
9278       SegmentedSectionPtr keyInfoSection;
9279 
9280       ndbassert(regTcPtr->keyInfoIVal != RNIL);
9281       getSection(keyInfoSection, regTcPtr->keyInfoIVal);
9282 
9283       handle.m_ptr[ LqhKeyReq::KeyInfoSectionNum ]= keyInfoSection;
9284       handle.m_cnt= 1;
9285 
9286       if (regTcPtr->totSendlenAi > 0)
9287       {
9288         SegmentedSectionPtr attrInfoSection;
9289 
9290         ndbassert(regTcPtr->attrInfoIVal != RNIL);
9291         getSection(attrInfoSection, regTcPtr->attrInfoIVal);
9292 
9293         handle.m_ptr[ LqhKeyReq::AttrInfoSectionNum ]= attrInfoSection;
9294         handle.m_cnt= 2;
9295       }
9296       else
9297       {
9298         /* No AttrInfo to be sent on.  This can occur for delete
9299          * or with an interpreted update when no actual update
9300          * is made
9301          * In this case, we free any attrInfo section now.
9302          */
9303         if (regTcPtr->attrInfoIVal != RNIL)
9304         {
9305           ndbassert(!( regTcPtr->m_flags &
9306                        TcConnectionrec::OP_SAVEATTRINFO));
9307           releaseSection(regTcPtr->attrInfoIVal);
9308           regTcPtr->attrInfoIVal= RNIL;
9309         }
9310       }
9311     }
9312     else
9313     {
9314       /* Zero-length primary key, better not have any
9315        * AttrInfo
9316        */
9317       ndbrequire(regTcPtr->totSendlenAi == 0);
9318       ndbrequire(regTcPtr->keyInfoIVal == RNIL);
9319       ndbrequire(regTcPtr->attrInfoIVal == RNIL);
9320       ndbrequire(LqhKeyReq::getRowidFlag(Treqinfo));
9321       ndbrequire(LqhKeyReq::getNrCopyFlag(Treqinfo));
9322     }
9323 
9324     sendBatchedLqhkeyreq(signal, lqhRef, LqhKeyReq::FixedSignalLength + nextPos, &handle);
9325 
9326     /* Long sections were freed as part of sendSignal */
9327     ndbassert( handle.m_cnt == 0);
9328     regTcPtr->keyInfoIVal= RNIL;
9329     regTcPtr->attrInfoIVal= RNIL;
9330   }
9331   else
9332   {
9333     /* Short LQHKEYREQ to older LQH
9334      * First few words of ATTRINFO go into LQHKEYREQ
9335      * (if they fit)
9336      */
9337     if (TAiLen > 0)
9338     {
9339       if (likely(nextPos + TAiLen + LqhKeyReq::FixedSignalLength <= 25))
9340       {
9341         jam();
9342         SegmentedSectionPtr attrInfoSection;
9343 
9344         ndbassert(regTcPtr->attrInfoIVal != RNIL);
9345 
9346         getSection(attrInfoSection, regTcPtr->attrInfoIVal);
9347         SectionReader attrInfoReader(attrInfoSection, getSectionSegmentPool());
9348 
9349         attrInfoReader.getWords(&lqhKeyReq->variableData[nextPos],
9350                                 TAiLen);
9351 
9352         nextPos+= TAiLen;
9353       }
9354       else
9355       {
9356         /* Not enough space in LQHKEYREQ, we'll send everything in
9357          * separate ATTRINFO signals
9358          */
9359         Treqinfo &= ~(Uint32)(LqhKeyReq::RI_AI_IN_THIS_MASK << LqhKeyReq::RI_AI_IN_THIS_SHIFT);
9360         lqhKeyReq->requestInfo = Treqinfo;
9361         TAiLen= 0;
9362       }
9363     }
9364 
9365     sendSignal(lqhRef, GSN_LQHKEYREQ, signal,
9366                nextPos + LqhKeyReq::FixedSignalLength, JBB);
9367 
9368     /* Send extra KeyInfo signals if necessary... */
9369     if (regTcPtr->primKeyLen > LqhKeyReq::MaxKeyInfo) {
9370       jam();
9371       sendTupkey(signal, tcConnectptr.p);
9372     }//if
9373 
9374     /* Send extra AttrInfo signals if necessary... */
9375     Uint32 remainingAiLen= regTcPtr->totSendlenAi - TAiLen;
9376 
9377     if (remainingAiLen != 0)
9378     {
9379       sig0 = regTcPtr->tcOprec;
9380       sig1 = regTcPtr->transid[0];
9381       sig2 = regTcPtr->transid[1];
9382       signal->theData[0] = sig0;
9383       signal->theData[1] = sig1;
9384       signal->theData[2] = sig2;
9385 
9386       SectionReader attrInfoReader(regTcPtr->attrInfoIVal,
9387                                    g_sectionSegmentPool);
9388 
9389       ndbassert(attrInfoReader.getSize() == regTcPtr->totSendlenAi);
9390 
9391       /* Step over words already sent in LQHKEYREQ above */
9392       attrInfoReader.step(TAiLen);
9393 
9394       while (remainingAiLen != 0)
9395       {
9396         Uint32 dataInSignal= MIN(AttrInfo::DataLength, remainingAiLen);
9397         attrInfoReader.getWords(&signal->theData[3],
9398                                 dataInSignal);
9399         remainingAiLen-= dataInSignal;
9400         sendSignal(lqhRef, GSN_ATTRINFO, signal,
9401                    AttrInfo::HeaderLength + dataInSignal, JBB);
9402       }
9403     }
9404   }
9405 
9406   /* LQHKEYREQ sent */
9407 
9408   regTcPtr->transactionState = TcConnectionrec::PREPARED;
9409   if (regTcPtr->dirtyOp == ZTRUE) {
9410     jam();
9411 /*************************************************************>*/
9412 /*       DIRTY WRITES ARE USED IN TWO SITUATIONS. THE FIRST    */
9413 /*       SITUATION IS WHEN THEY ARE USED TO UPDATE COUNTERS AND*/
9414 /*       OTHER ATTRIBUTES WHICH ARE NOT SENSITIVE TO CONSISTE- */
9415 /*       NCY. THE SECOND SITUATION IS BY OPERATIONS THAT ARE   */
9416 /*       SENT AS PART OF A COPY FRAGMENT PROCESS.              */
9417 /*                                                             */
9418 /*       DURING A COPY FRAGMENT PROCESS THERE IS NO LOGGING    */
9419 /*       ONGOING SINCE THE FRAGMENT IS NOT COMPLETE YET. THE   */
9420 /*       LOGGING STARTS AFTER COMPLETING THE LAST COPY TUPLE   */
9421 /*       OPERATION. THE EXECUTION OF THE LAST COPY TUPLE DOES  */
9422 /*       ALSO START A LOCAL CHECKPOINT SO THAT THE FRAGMENT    */
9423 /*       REPLICA IS RECOVERABLE. THUS GLOBAL CHECKPOINT ID FOR */
9424 /*       THOSE OPERATIONS ARE NOT INTERESTING.                 */
9425 /*                                                             */
9426 /*       A DIRTY WRITE IS BY DEFINITION NOT CONSISTENT. THUS   */
9427 /*       IT CAN USE ANY GLOBAL CHECKPOINT. THE IDEA HERE IS TO */
9428 /*       ALWAYS USE THE LATEST DEFINED GLOBAL CHECKPOINT ID IN */
9429 /*       THIS NODE.                                            */
9430 /*************************************************************>*/
9431     cleanUp(signal, tcConnectptr);
9432     return;
9433   }//if
9434   /* ------------------------------------------------------------------------
9435    *   ALL INFORMATION NEEDED BY THE COMMIT PHASE AND COMPLETE PHASE IS
9436    *   KEPT IN THE TC_CONNECT RECORD. TO ENSURE PROPER USE OF MEMORY
9437    *   RESOURCES WE DEALLOCATE THE ATTRINFO RECORD AND KEY RECORDS
9438    *   AS SOON AS POSSIBLE.
9439    * ------------------------------------------------------------------------ */
9440   releaseOprec(signal, tcConnectptr);
9441 }//Dblqh::packLqhkeyreqLab()
9442 
9443 /* ========================================================================= */
9444 /* ==== CHECK IF THE LOG RECORD FITS INTO THE CURRENT MBYTE,         ======= */
9445 /*      OTHERWISE SWITCH TO NEXT MBYTE.                                      */
9446 /*                                                                           */
9447 /* ========================================================================= */
checkNewMbyte(Signal * signal,const TcConnectionrec * regTcPtr)9448 void Dblqh::checkNewMbyte(Signal* signal, const TcConnectionrec* regTcPtr)
9449 {
9450   UintR tcnmTmp;
9451   UintR ttotalLogSize;
9452 
9453 /* -------------------------------------------------- */
9454 /*       CHECK IF A NEW MBYTE OF LOG RECORD IS TO BE  */
9455 /*       OPENED BEFORE WRITING THE LOG RECORD. NO LOG */
9456 /*       RECORDS ARE ALLOWED TO SPAN A MBYTE BOUNDARY */
9457 /*                                                    */
9458 /*       INPUT:  TC_CONNECTPTR   THE OPERATION        */
9459 /*               LOG_FILE_PTR    THE LOG FILE         */
9460 /*       OUTPUT: LOG_FILE_PTR    THE NEW LOG FILE     */
9461 /* -------------------------------------------------- */
9462   ttotalLogSize = ZLOG_HEAD_SIZE + regTcPtr->currTupAiLen;
9463   ttotalLogSize = ttotalLogSize + regTcPtr->primKeyLen;
9464   tcnmTmp = logFilePtr.p->remainingWordsInMbyte;
9465   if ((ttotalLogSize + ZNEXT_LOG_SIZE) <= tcnmTmp) {
9466     ndbrequire(tcnmTmp >= ttotalLogSize);
9467     logFilePtr.p->remainingWordsInMbyte = tcnmTmp - ttotalLogSize;
9468     return;
9469   } else {
9470     jam();
9471 /* -------------------------------------------------- */
9472 /*       IT WAS NOT ENOUGH SPACE IN THIS MBYTE FOR    */
9473 /*       THIS LOG RECORD. MOVE TO NEXT MBYTE          */
9474 /*       THIS MIGHT INCLUDE CHANGING LOG FILE         */
9475 /* -------------------------------------------------- */
9476 /*       WE HAVE TO INSERT A NEXT LOG RECORD FIRST    */
9477 /* -------------------------------------------------- */
9478 /*       THEN CONTINUE BY WRITING THE FILE DESCRIPTORS*/
9479 /* -------------------------------------------------- */
9480     logPagePtr.i = logFilePtr.p->currentLogpage;
9481     ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
9482     changeMbyte(signal);
9483     tcnmTmp = logFilePtr.p->remainingWordsInMbyte;
9484   }//if
9485   ndbrequire(tcnmTmp >= ttotalLogSize);
9486   logFilePtr.p->remainingWordsInMbyte = tcnmTmp - ttotalLogSize;
9487 }//Dblqh::checkNewMbyte()
9488 
9489 /* --------------------------------------------------------------------------
9490  * -------               WRITE OPERATION HEADER TO LOG                -------
9491  *
9492  *       SUBROUTINE SHORT NAME: WLH
9493  * ------------------------------------------------------------------------- */
writeLogHeader(Signal * signal,const TcConnectionrec * regTcPtr,LogPartRecord * regLogPartPtr)9494 void Dblqh::writeLogHeader(Signal* signal,
9495                            const TcConnectionrec* regTcPtr,
9496                            LogPartRecord* regLogPartPtr)
9497 {
9498   Uint32 keyLen = regTcPtr->primKeyLen;
9499   Uint32 aiLen = regTcPtr->currTupAiLen;
9500   Local_key rowid = regTcPtr->m_row_id;
9501   Uint32 totLogLen = ZLOG_HEAD_SIZE + aiLen + keyLen;
9502   Uint32 logPos = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX];
9503   Uint32 hashValue = regTcPtr->hashValue;
9504   Uint32 operation = regTcPtr->operation;
9505   regLogPartPtr->m_total_written_words += totLogLen;
9506 
9507   if ((logPos + ZLOG_HEAD_SIZE) < ZPAGE_SIZE) {
9508     Uint32* dataPtr = &logPagePtr.p->logPageWord[logPos];
9509     logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = logPos + ZLOG_HEAD_SIZE;
9510     dataPtr[0] = ZPREP_OP_TYPE;
9511     dataPtr[1] = totLogLen;
9512     dataPtr[2] = hashValue;
9513     dataPtr[3] = operation;
9514     dataPtr[4] = aiLen;
9515     dataPtr[5] = keyLen;
9516     dataPtr[6] = rowid.m_page_no;
9517     dataPtr[7] = rowid.m_page_idx;
9518   } else {
9519     writeLogWord(signal, ZPREP_OP_TYPE);
9520     writeLogWord(signal, totLogLen);
9521     writeLogWord(signal, hashValue);
9522     writeLogWord(signal, operation);
9523     writeLogWord(signal, aiLen);
9524     writeLogWord(signal, keyLen);
9525     writeLogWord(signal, rowid.m_page_no);
9526     writeLogWord(signal, rowid.m_page_idx);
9527   }//if
9528 }//Dblqh::writeLogHeader()
9529 
9530 /* --------------------------------------------------------------------------
9531  * -------               WRITE TUPLE KEY TO LOG                       -------
9532  *
9533  *       SUBROUTINE SHORT NAME: WK
9534  * ------------------------------------------------------------------------- */
writeKey(Signal * signal,const TcConnectionrec * regTcPtr)9535 void Dblqh::writeKey(Signal* signal, const TcConnectionrec* regTcPtr)
9536 {
9537   SectionReader keyInfoReader(regTcPtr->keyInfoIVal,
9538                               g_sectionSegmentPool);
9539   const Uint32* srcPtr;
9540   Uint32 length;
9541   Uint32 wordsWritten= 0;
9542 
9543   /* Write contiguous chunks of words from the KeyInfo
9544    * section to the log
9545    */
9546   while (keyInfoReader.getWordsPtr(srcPtr,
9547                                    length))
9548   {
9549     writeLogWords(signal, srcPtr, length);
9550     wordsWritten+= length;
9551   }
9552 
9553   ndbassert( wordsWritten == regTcPtr->primKeyLen );
9554 }//Dblqh::writeKey()
9555 
9556 /* --------------------------------------------------------------------------
9557  * -------               WRITE ATTRINFO TO LOG                        -------
9558  *
9559  *       SUBROUTINE SHORT NAME: WA
9560  * ------------------------------------------------------------------------- */
writeAttrinfoLab(Signal * signal,const TcConnectionrec * regTcPtr)9561 void Dblqh::writeAttrinfoLab(Signal* signal, const TcConnectionrec* regTcPtr)
9562 {
9563   Uint32 totLen = regTcPtr->currTupAiLen;
9564   if (totLen == 0)
9565     return;
9566 
9567   jamDebug();
9568   ndbassert( regTcPtr->attrInfoIVal != RNIL );
9569   SectionReader attrInfoReader(regTcPtr->attrInfoIVal,
9570                                g_sectionSegmentPool);
9571   const Uint32* srcPtr;
9572   Uint32 length;
9573   Uint32 wordsWritten= 0;
9574 
9575   /* Write contiguous chunks of words from the
9576    * AttrInfo section to the log
9577    */
9578   while (attrInfoReader.getWordsPtr(srcPtr,
9579                                     length))
9580   {
9581     writeLogWords(signal, srcPtr, length);
9582     wordsWritten+= length;
9583   }
9584 
9585   ndbassert( wordsWritten == totLen );
9586 }//Dblqh::writeAttrinfoLab()
9587 
9588 /* ------------------------------------------------------------------------- */
9589 /* -------          SEND TUPLE KEY IN KEYINFO SIGNAL(S)              ------- */
9590 /*                                                                           */
9591 /*       SUBROUTINE SHORT NAME: STU                                          */
9592 /* ------------------------------------------------------------------------- */
sendTupkey(Signal * signal,const TcConnectionrec * regTcPtr)9593 void Dblqh::sendTupkey(Signal* signal, const TcConnectionrec* regTcPtr)
9594 {
9595   BlockReference lqhRef = 0;
9596   {
9597     // wl4391_todo fragptr
9598     FragrecordPtr Tfragptr;
9599     Tfragptr.i = regTcPtr->fragmentptr;
9600     c_fragment_pool.getPtr(Tfragptr);
9601     Uint32 Tnode = regTcPtr->nextReplica;
9602     Uint32 instanceKey = Tfragptr.p->lqhInstanceKey;
9603     lqhRef = numberToRef(DBLQH, instanceKey, Tnode);
9604   }
9605 
9606   signal->theData[0] = regTcPtr->tcOprec;
9607   signal->theData[1] = regTcPtr->transid[0];
9608   signal->theData[2] = regTcPtr->transid[1];
9609 
9610   Uint32 remainingLen= regTcPtr->primKeyLen -
9611     LqhKeyReq::MaxKeyInfo;
9612 
9613   SectionReader keyInfoReader(regTcPtr->keyInfoIVal,
9614                               g_sectionSegmentPool);
9615 
9616   ndbassert(keyInfoReader.getSize() > LqhKeyReq::MaxKeyInfo);
9617 
9618   /* Step over the words already sent in LQHKEYREQ */
9619   keyInfoReader.step(LqhKeyReq::MaxKeyInfo);
9620 
9621   while (remainingLen != 0)
9622   {
9623     Uint32 dataInSignal= MIN(KeyInfo::DataLength, remainingLen);
9624     keyInfoReader.getWords(&signal->theData[3],
9625                            dataInSignal);
9626     remainingLen-= dataInSignal;
9627     sendSignal(lqhRef, GSN_KEYINFO, signal,
9628                KeyInfo::HeaderLength + dataInSignal, JBB);
9629   }
9630 }//Dblqh::sendTupkey()
9631 
cleanUp(Signal * signal,TcConnectionrecPtr tcConnectptr)9632 void Dblqh::cleanUp(Signal* signal, TcConnectionrecPtr tcConnectptr)
9633 {
9634   releaseOprec(signal, tcConnectptr);
9635   deleteTransidHash(signal, tcConnectptr);
9636   releaseTcrec(signal, tcConnectptr);
9637 }//Dblqh::cleanUp()
9638 
releaseOprec(Signal * signal,TcConnectionrecPtr regTcPtr)9639 void Dblqh::releaseOprec(Signal* signal, TcConnectionrecPtr regTcPtr)
9640 {
9641   if (regTcPtr.p->m_dealloc_state != TcConnectionrec::DA_IDLE)
9642   {
9643     handleDeallocOp(signal, regTcPtr);
9644   }
9645 
9646   /* Release long sections if present */
9647   releaseSection(regTcPtr.p->keyInfoIVal);
9648   regTcPtr.p->keyInfoIVal = RNIL;
9649   releaseSection(regTcPtr.p->attrInfoIVal);
9650   regTcPtr.p->attrInfoIVal = RNIL;
9651 }//Dblqh::releaseOprec()
9652 
9653 /* ------------------------------------------------------------------------- */
9654 /* ------         DELETE TRANSACTION ID FROM HASH TABLE              ------- */
9655 /*                                                                           */
9656 /* ------------------------------------------------------------------------- */
deleteTransidHash(Signal * signal,TcConnectionrecPtr & tcConnectptr)9657 void Dblqh::deleteTransidHash(Signal* signal, TcConnectionrecPtr& tcConnectptr)
9658 {
9659   TcConnectionrec * const regTcPtr = tcConnectptr.p;
9660   TcConnectionrecPtr prevHashptr;
9661   TcConnectionrecPtr nextHashptr;
9662   /**
9663    * This operation has not been inserted in the hash list at all.
9664    * (It is a non-transactional 'dirtyOp', or the request failed
9665    *  before it was ever inserted in the hash list.)
9666    */
9667   if (regTcPtr->hashIndex == RNIL)
9668   {
9669     jamDebug();
9670     /* If this operation is 'non-dirty', there should be no duplicates */
9671     ndbassert(regTcPtr->dirtyOp == ZTRUE ||
9672               findTransaction(regTcPtr->transid[0], regTcPtr->transid[1],
9673                               regTcPtr->tcOprec, regTcPtr->tcHashKeyHi,
9674                               tcConnectptr) == ZNOT_FOUND);
9675     return;
9676   }
9677 
9678   prevHashptr.i = regTcPtr->prevHashRec;
9679   nextHashptr.i = regTcPtr->nextHashRec;
9680   /* prevHashptr and nextHashptr may be RNIL when the bucket has 1 element */
9681 
9682   if (prevHashptr.i != RNIL) {
9683     jamDebug();
9684     ndbrequire(tcConnect_pool.getValidPtr(prevHashptr));
9685     ndbassert(prevHashptr.p->nextHashRec == tcConnectptr.i);
9686     prevHashptr.p->nextHashRec = nextHashptr.i;
9687   } else {
9688     jamDebug();
9689 /* ------------------------------------------------------------------------- */
9690 /* THE OPERATION WAS PLACED FIRST IN THE LIST OF THE HASH TABLE. NEED TO SET */
9691 /* A NEW LEADER OF THE LIST.                                                 */
9692 /* ------------------------------------------------------------------------- */
9693     Uint32 hashIndex = regTcPtr->hashIndex;
9694     ndbassert(hashIndex == ((regTcPtr->transid[0] ^ regTcPtr->tcOprec) &
9695                              (TRANSID_HASH_SIZE - 1)));
9696     ndbassert(ctransidHash[hashIndex] == tcConnectptr.i);
9697     ctransidHash[hashIndex] = nextHashptr.i;
9698   }//if
9699   if (nextHashptr.i != RNIL) {
9700     jamDebug();
9701     ndbrequire(tcConnect_pool.getValidPtr(nextHashptr));
9702     ndbassert(nextHashptr.p->prevHashRec == tcConnectptr.i);
9703     nextHashptr.p->prevHashRec = prevHashptr.i;
9704   }//if
9705 
9706   regTcPtr->hashIndex = regTcPtr->prevHashRec = regTcPtr->nextHashRec = RNIL;
9707 }//Dblqh::deleteTransidHash()
9708 
9709 /* -------------------------------------------------------------------------
9710  * -------       RELEASE OPERATION FROM ACTIVE LIST ON FRAGMENT      -------
9711  *
9712  *       SUBROUTINE SHORT NAME = RAF
9713  * ------------------------------------------------------------------------- */
9714 /* ######################################################################### */
9715 /* #######                   TRANSACTION MODULE                      ####### */
9716 /*      THIS MODULE HANDLES THE COMMIT AND THE COMPLETE PHASE.               */
9717 /* ######################################################################### */
warningReport(Signal * signal,int place)9718 void Dblqh::warningReport(Signal* signal, int place)
9719 {
9720   switch (place) {
9721   case 0:
9722     jam();
9723 #ifdef ABORT_TRACE
9724     ndbout << "W: Received COMMIT in wrong state in Dblqh" << endl;
9725 #endif
9726     break;
9727   case 1:
9728     jam();
9729 #ifdef ABORT_TRACE
9730     ndbout << "W: Received COMMIT with wrong transid in Dblqh" << endl;
9731 #endif
9732     break;
9733   case 2:
9734     jam();
9735 #ifdef ABORT_TRACE
9736     ndbout << "W: Received COMPLETE in wrong state in Dblqh" << endl;
9737 #endif
9738     break;
9739   case 3:
9740     jam();
9741 #ifdef ABORT_TRACE
9742     ndbout << "W: Received COMPLETE with wrong transid in Dblqh" << endl;
9743 #endif
9744     break;
9745   case 4:
9746     jam();
9747 #ifdef ABORT_TRACE
9748     ndbout << "W: Received COMMITREQ in wrong state in Dblqh" << endl;
9749 #endif
9750     break;
9751   case 5:
9752     jam();
9753 #ifdef ABORT_TRACE
9754     ndbout << "W: Received COMMITREQ with wrong transid in Dblqh" << endl;
9755 #endif
9756     break;
9757   case 6:
9758     jam();
9759 #ifdef ABORT_TRACE
9760     ndbout << "W: Received COMPLETEREQ in wrong state in Dblqh" << endl;
9761 #endif
9762     break;
9763   case 7:
9764     jam();
9765 #ifdef ABORT_TRACE
9766     ndbout << "W: Received COMPLETEREQ with wrong transid in Dblqh" << endl;
9767 #endif
9768     break;
9769   case 8:
9770     jam();
9771 #ifdef ABORT_TRACE
9772     ndbout << "W: Received ABORT with non-existing transid in Dblqh" << endl;
9773 #endif
9774     break;
9775   case 9:
9776     jam();
9777 #ifdef ABORT_TRACE
9778     ndbout << "W: Received ABORTREQ with non-existing transid in Dblqh" << endl;
9779 #endif
9780     break;
9781   case 10:
9782     jam();
9783 #ifdef ABORT_TRACE
9784     ndbout << "W: Received ABORTREQ in wrong state in Dblqh" << endl;
9785 #endif
9786     break;
9787   case 11:
9788     jam();
9789 #ifdef ABORT_TRACE
9790     ndbout << "W: Received COMMIT when tc-rec released in Dblqh" << endl;
9791 #endif
9792     break;
9793   case 12:
9794     jam();
9795 #ifdef ABORT_TRACE
9796     ndbout << "W: Received COMPLETE when tc-rec released in Dblqh" << endl;
9797 #endif
9798     break;
9799   case 13:
9800     jam();
9801 #ifdef ABORT_TRACE
9802     ndbout << "W: Received LQHKEYREF when tc-rec released in Dblqh" << endl;
9803 #endif
9804     break;
9805   case 14:
9806     jam();
9807 #ifdef ABORT_TRACE
9808     ndbout << "W: Received LQHKEYREF with wrong transid in Dblqh" << endl;
9809 #endif
9810     break;
9811   case 15:
9812     jam();
9813 #ifdef ABORT_TRACE
9814     ndbout << "W: Received LQHKEYREF when already aborting in Dblqh" << endl;
9815 #endif
9816     break;
9817   case 16:
9818     jam();
9819     ndbrequire(cstartPhase == ZNIL);
9820 #ifdef ABORT_TRACE
9821     ndbout << "W: Received LQHKEYREF in wrong state in Dblqh" << endl;
9822 #endif
9823     break;
9824   default:
9825     jam();
9826 #ifdef ABORT_TRACE
9827     ndbout << "LQH WarningReport: " << place << endl;
9828 #endif
9829     break;
9830   }//switch
9831   return;
9832 }//Dblqh::warningReport()
9833 
errorReport(Signal * signal,int place)9834 void Dblqh::errorReport(Signal* signal, int place)
9835 {
9836   switch (place) {
9837   case 0:
9838     jam();
9839     break;
9840   case 1:
9841     jam();
9842     break;
9843   case 2:
9844     jam();
9845     break;
9846   case 3:
9847     jam();
9848     break;
9849   default:
9850     jam();
9851     break;
9852   }//switch
9853   systemErrorLab(signal, __LINE__);
9854   return;
9855 }//Dblqh::errorReport()
9856 
9857 void
execFIRE_TRIG_REQ(Signal * signal)9858 Dblqh::execFIRE_TRIG_REQ(Signal* signal)
9859 {
9860   Uint32 tcOprec = signal->theData[0];
9861   Uint32 transid1 = signal->theData[1];
9862   Uint32 transid2 = signal->theData[2];
9863   Uint32 pass = signal->theData[3];
9864   Uint32 senderRef = signal->getSendersBlockRef();
9865 
9866   jamEntry();
9867 
9868   if (ERROR_INSERTED_CLEAR(5064))
9869   {
9870     // throw away...should cause timeout in TC
9871     return;
9872   }
9873 
9874   CRASH_INSERTION(5072);
9875 
9876   Uint32 err;
9877   TcConnectionrecPtr tcConnectptr;
9878   if (findTransaction(transid1, transid2, tcOprec, 0, tcConnectptr) == ZOK &&
9879       !ERROR_INSERTED_CLEAR(5065) &&
9880       !ERROR_INSERTED(5070) &&
9881       !ERROR_INSERTED(5071))
9882   {
9883     TcConnectionrec * const regTcPtr = tcConnectptr.p;
9884 
9885     if (unlikely(regTcPtr->transactionState != TcConnectionrec::PREPARED ||
9886                  ERROR_INSERTED_CLEAR(5067)))
9887     {
9888       err = FireTrigRef::FTR_IncorrectState;
9889       goto do_err;
9890     }
9891 
9892     /**
9893      *
9894      */
9895     signal->theData[0] = regTcPtr->tupConnectrec;
9896     signal->theData[1] = regTcPtr->tcBlockref;
9897     signal->theData[2] = regTcPtr->tcOprec;
9898     signal->theData[3] = transid1;
9899     signal->theData[4] = transid2;
9900     signal->theData[5] = pass;
9901     Uint32 tup = refToMain(ctupBlockref);
9902     EXECUTE_DIRECT(tup, GSN_FIRE_TRIG_REQ, signal, 6);
9903 
9904     err = signal->theData[0];
9905     Uint32 cnt = signal->theData[1];
9906 
9907     if (ERROR_INSERTED_CLEAR(5066))
9908     {
9909       err = 5066;
9910     }
9911 
9912     if (ERROR_INSERTED_CLEAR(5068))
9913       tcOprec++;
9914     if (ERROR_INSERTED_CLEAR(5069))
9915       transid1++;
9916 
9917     if (err == 0)
9918     {
9919       jam();
9920       Uint32 Tdata[FireTrigConf::SignalLength];
9921       FireTrigConf * conf = CAST_PTR(FireTrigConf, Tdata);
9922       conf->tcOpRec = tcOprec;
9923       conf->transId[0] = transid1;
9924       conf->transId[1] = transid2;
9925       conf->numFiredTriggers = cnt;
9926       sendFireTrigConfTc(signal, regTcPtr->tcBlockref, Tdata);
9927       return;
9928     }
9929   }
9930   else
9931   {
9932     jam();
9933     err = FireTrigRef::FTR_UnknownOperation;
9934   }
9935 
9936 do_err:
9937   if (ERROR_INSERTED_CLEAR(5070))
9938     tcOprec++;
9939 
9940   if (ERROR_INSERTED_CLEAR(5071))
9941     transid1++;
9942 
9943   FireTrigRef * ref = CAST_PTR(FireTrigRef, signal->getDataPtrSend());
9944   ref->tcOpRec = tcOprec;
9945   ref->transId[0] = transid1;
9946   ref->transId[1] = transid2;
9947   ref->errCode = err;
9948   sendSignal(senderRef, GSN_FIRE_TRIG_REF,
9949              signal, FireTrigRef::SignalLength, JBB);
9950 
9951   return;
9952 }
9953 
9954 void
sendFireTrigConfTc(Signal * signal,BlockReference atcBlockref,Uint32 Tdata[])9955 Dblqh::sendFireTrigConfTc(Signal* signal,
9956                           BlockReference atcBlockref,
9957                           Uint32 Tdata[])
9958 {
9959   Uint32 instanceKey = refToInstance(atcBlockref);
9960 
9961   ndbassert(refToMain(atcBlockref) == DBTC);
9962   if (instanceKey > MAX_NDBMT_TC_THREADS)
9963   {
9964     jam();
9965     memcpy(signal->theData, Tdata, 4 * FireTrigConf::SignalLength);
9966     sendSignal(atcBlockref, GSN_FIRE_TRIG_CONF,
9967                signal, FireTrigConf::SignalLength, JBB);
9968     return;
9969   }
9970 
9971   HostRecordPtr Thostptr;
9972   Thostptr.i = refToNode(atcBlockref);
9973   ptrCheckGuard(Thostptr, chostFileSize, hostRecord);
9974   Uint32 len = FireTrigConf::SignalLength;
9975   struct PackedWordsContainer * container = &Thostptr.p->tc_pack[instanceKey];
9976 
9977   if (container->noOfPackedWords > (25 - len))
9978   {
9979     jam();
9980     sendPackedSignal(signal, container);
9981   }
9982   else
9983   {
9984     jam();
9985     updatePackedList(signal, Thostptr.p, Thostptr.i);
9986   }
9987 
9988   ndbassert(FireTrigConf::SignalLength == 4);
9989   Uint32 * dst = &container->packedWords[container->noOfPackedWords];
9990   container->noOfPackedWords += len;
9991   dst[0] = Tdata[0] | (ZFIRE_TRIG_CONF << 28);
9992   dst[1] = Tdata[1];
9993   dst[2] = Tdata[2];
9994   dst[3] = Tdata[3];
9995 }
9996 
9997 bool
check_fire_trig_pass(Uint32 opId,Uint32 pass)9998 Dblqh::check_fire_trig_pass(Uint32 opId, Uint32 pass)
9999 {
10000   /**
10001    * Check that trigger only fires once per pass
10002    *   (per primary key)
10003    */
10004   TcConnectionrecPtr regTcPtr;
10005   regTcPtr.i= opId;
10006   ndbrequire(tcConnect_pool.getValidPtr(regTcPtr));
10007   if (regTcPtr.p->m_fire_trig_pass <= pass)
10008   {
10009     regTcPtr.p->m_fire_trig_pass = pass + 1;
10010     return true;
10011   }
10012   return false;
10013 }
10014 
10015 /* ************************************************************************>>
10016  *  COMMIT: Start commit request from TC. This signal is originally sent as a
10017  *  packed signal and this function is called from execPACKED_SIGNAL.
10018  *  This is the normal commit protocol where TC first send this signal to the
10019  *  backup node which then will send COMMIT to the primary node. If
10020  *  everything is ok the primary node send COMMITTED back to TC.
10021  * ************************************************************************>> */
execCOMMIT(Signal * signal)10022 void Dblqh::execCOMMIT(Signal* signal)
10023 {
10024   TcConnectionrecPtr tcConnectptr;
10025   tcConnectptr.i = signal->theData[0];
10026   Uint32 gci_hi = signal->theData[1];
10027   Uint32 transid1 = signal->theData[2];
10028   Uint32 transid2 = signal->theData[3];
10029   Uint32 gci_lo = signal->theData[4];
10030   jamEntry();
10031   if (unlikely(!tcConnect_pool.getValidPtr(tcConnectptr)))
10032   {
10033     jam();
10034     warningReport(signal, 0);
10035     return;
10036   }//if
10037   if (ERROR_INSERTED(5011)) {
10038     CLEAR_ERROR_INSERT_VALUE;
10039     sendSignalWithDelay(cownref, GSN_COMMIT, signal, 2000,signal->getLength());
10040     return;
10041   }//if
10042   if (ERROR_INSERTED(5012)) {
10043     SET_ERROR_INSERT_VALUE(5017);
10044     sendSignalWithDelay(cownref, GSN_COMMIT, signal, 2000,signal->getLength());
10045     return;
10046   }//if
10047   if (ERROR_INSERTED(5062) &&
10048       ((refToMain(signal->getSendersBlockRef()) == DBTC) ||
10049        signal->getSendersBlockRef() == reference()))
10050   {
10051     Uint32 save = signal->getSendersBlockRef();
10052     ndbout_c("Delaying execCOMMIT");
10053     sendSignalWithDelay(cownref, GSN_COMMIT, signal, 2000, signal->getLength());
10054 
10055     if (refToMain(save) == DBTC)
10056     {
10057       ndbout_c("killing %u", refToNode(save));
10058       signal->theData[0] = 9999;
10059       sendSignal(numberToRef(CMVMI, refToNode(save)),
10060                  GSN_NDB_TAMPER, signal, 1, JBB);
10061     }
10062     return;
10063   }
10064   if (likely((tcConnectptr.p->transid[0] == transid1) &&
10065              (tcConnectptr.p->transid[1] == transid2)))
10066   {
10067 
10068     TcConnectionrec * const regTcPtr = tcConnectptr.p;
10069     TRACE_OP(regTcPtr, "COMMIT");
10070 
10071     CRASH_INSERTION(5048);
10072     if (ERROR_INSERTED(5049))
10073     {
10074       SET_ERROR_INSERT_VALUE(5048);
10075     }
10076     if (ERROR_INSERTED(5093))
10077     {
10078       if (tcConnectptr.p->operation == ZREAD)
10079       {
10080         jam();
10081         CLEAR_ERROR_INSERT_VALUE;
10082         g_eventLogger->info("Delaying COMMIT for READ");
10083         sendSignalWithDelay(cownref, GSN_COMMIT, signal, 3000, 5);
10084         return;
10085       }
10086     }
10087     commitReqLab(signal, gci_hi, gci_lo, tcConnectptr);
10088     return;
10089   }//if
10090   warningReport(signal, 1);
10091   return;
10092 }//Dblqh::execCOMMIT()
10093 
10094 /* ************************************************************************>>
10095  *  COMMITREQ: Commit request from TC. This is the commit protocol used if
10096  *  one of the nodes is not behaving correctly. TC explicitly sends COMMITREQ
10097  *  to both the backup and primary node and gets a COMMITCONF back if the
10098  *  COMMIT was ok.
10099  * ************************************************************************>> */
execCOMMITREQ(Signal * signal)10100 void Dblqh::execCOMMITREQ(Signal* signal)
10101 {
10102   jamEntry();
10103   Uint32 reqPtr = signal->theData[0];
10104   BlockReference reqBlockref = signal->theData[1];
10105   Uint32 gci_hi = signal->theData[2];
10106   Uint32 transid1 = signal->theData[3];
10107   Uint32 transid2 = signal->theData[4];
10108   Uint32 tcOprec = signal->theData[6];
10109   Uint32 gci_lo = signal->theData[7];
10110 
10111   ndbrequire(signal->getLength() >= 8);
10112 
10113   if (ERROR_INSERTED(5004)) {
10114     systemErrorLab(signal, __LINE__);
10115   }
10116   if (ERROR_INSERTED(5017)) {
10117     CLEAR_ERROR_INSERT_VALUE;
10118     sendSignalWithDelay(cownref, GSN_COMMITREQ, signal, 2000,
10119                         signal->getLength());
10120     return;
10121   }//if
10122   TcConnectionrecPtr tcConnectptr;
10123   if (findTransaction(transid1,
10124                       transid2,
10125                       tcOprec, 0,
10126                       tcConnectptr) != ZOK)
10127   {
10128     warningReport(signal, 5);
10129     return;
10130   }//if
10131   TcConnectionrec * const regTcPtr = tcConnectptr.p;
10132   switch (regTcPtr->transactionState) {
10133   case TcConnectionrec::PREPARED:
10134   case TcConnectionrec::LOG_COMMIT_QUEUED_WAIT_SIGNAL:
10135   case TcConnectionrec::LOG_COMMIT_WRITTEN_WAIT_SIGNAL:
10136     jam();
10137 /*-------------------------------------------------------*/
10138 /*       THE NORMAL CASE.                                */
10139 /*-------------------------------------------------------*/
10140     regTcPtr->reqBlockref = reqBlockref;
10141     regTcPtr->reqRef = reqPtr;
10142     regTcPtr->abortState = TcConnectionrec::REQ_FROM_TC;
10143     commitReqLab(signal, gci_hi, gci_lo, tcConnectptr);
10144     return;
10145   case TcConnectionrec::COMMITTED:
10146     jam();
10147 /*---------------------------------------------------------*/
10148 /*       FOR SOME REASON THE COMMIT PHASE HAVE BEEN        */
10149 /*       FINISHED AFTER A TIME OUT. WE NEED ONLY SEND A    */
10150 /*       COMMITCONF SIGNAL.                                */
10151 /*---------------------------------------------------------*/
10152     regTcPtr->reqBlockref = reqBlockref;
10153     regTcPtr->reqRef = reqPtr;
10154     regTcPtr->abortState = TcConnectionrec::REQ_FROM_TC;
10155     signal->theData[0] = regTcPtr->reqRef;
10156     signal->theData[1] = cownNodeid;
10157     signal->theData[2] = regTcPtr->transid[0];
10158     signal->theData[3] = regTcPtr->transid[1];
10159     sendSignal(regTcPtr->reqBlockref, GSN_COMMITCONF, signal, 4, JBB);
10160     break;
10161   case TcConnectionrec::WAIT_TUP_COMMIT:
10162     jam();
10163     regTcPtr->reqBlockref = reqBlockref;
10164     regTcPtr->reqRef = reqPtr;
10165     regTcPtr->abortState = TcConnectionrec::REQ_FROM_TC;
10166     /*empty*/;
10167     return;
10168   default:
10169     jam();
10170     ndbabort();
10171     return;
10172     break;
10173   }//switch
10174   return;
10175 }//Dblqh::execCOMMITREQ()
10176 
10177 /* ************************************************************************>>
10178  *  COMPLETE : Complete the transaction. Sent as a packed signal from TC.
10179  *  Works the same way as COMMIT protocol. This is the normal case with both
10180  *  primary and backup working (See COMMIT).
10181  * ************************************************************************>> */
execCOMPLETE(Signal * signal)10182 void Dblqh::execCOMPLETE(Signal* signal)
10183 {
10184   TcConnectionrecPtr tcConnectptr;
10185   tcConnectptr.i = signal->theData[0];
10186   Uint32 transid1 = signal->theData[1];
10187   Uint32 transid2 = signal->theData[2];
10188   jamEntry();
10189   if (unlikely(!tcConnect_pool.getValidPtr(tcConnectptr)))
10190   {
10191     jam();
10192     warningReport(signal, 2);
10193     return;
10194   }//if
10195   CRASH_INSERTION(5042);
10196 
10197   if (ERROR_INSERTED(5013)) {
10198     CLEAR_ERROR_INSERT_VALUE;
10199     sendSignalWithDelay(cownref, GSN_COMPLETE, signal, 2000, 3);
10200     return;
10201   }//if
10202   if (ERROR_INSERTED(5014)) {
10203     SET_ERROR_INSERT_VALUE(5018);
10204     sendSignalWithDelay(cownref, GSN_COMPLETE, signal, 2000, 3);
10205     return;
10206   }//if
10207   if (ERROR_INSERTED(5063) &&
10208       ((refToMain(signal->getSendersBlockRef()) == DBTC) ||
10209        signal->getSendersBlockRef() == reference()))
10210   {
10211     Uint32 save = signal->getSendersBlockRef();
10212     ndbout_c("Delaying execCOMPLETE");
10213     sendSignalWithDelay(cownref, GSN_COMPLETE,signal, 2000,signal->getLength());
10214 
10215     if (refToMain(save) == DBTC)
10216     {
10217       ndbout_c("killing %u", refToNode(save));
10218       signal->theData[0] = 9999;
10219       sendSignal(numberToRef(CMVMI, refToNode(save)),
10220                  GSN_NDB_TAMPER, signal, 1, JBB);
10221     }
10222     return;
10223   }
10224   if (likely(tcConnectptr.p->transactionState == TcConnectionrec::COMMITTED) &&
10225              (tcConnectptr.p->transid[0] == transid1) &&
10226              (tcConnectptr.p->transid[1] == transid2))
10227   {
10228     TcConnectionrec * const regTcPtr = tcConnectptr.p;
10229     if (ERROR_INSERTED(5093))
10230     {
10231       if ((tcConnectptr.p->seqNoReplica != 0) &&
10232           (tcConnectptr.p->operation == ZDELETE))
10233       {
10234         jam();
10235         CLEAR_ERROR_INSERT_VALUE;
10236         g_eventLogger->info("Delaying COMPLETE for DELETE at Backup replica");
10237         sendSignalWithDelay(cownref, GSN_COMPLETE, signal, 1000, 3);
10238         return;
10239       }
10240     }
10241     TRACE_OP(regTcPtr, "COMPLETE");
10242 
10243     if (tcConnectptr.p->seqNoReplica != 0 &&
10244 	tcConnectptr.p->activeCreat == Fragrecord::AC_NORMAL) {
10245       jam();
10246       localCommitLab(signal, tcConnectptr);
10247       return;
10248     }
10249     else if (tcConnectptr.p->seqNoReplica == 0)
10250     {
10251       jam();
10252       completeTransLastLab(signal, tcConnectptr);
10253       return;
10254     }
10255     else
10256     {
10257       jam();
10258       completeTransNotLastLab(signal, tcConnectptr);
10259       return;
10260     }
10261   }//if
10262   if (tcConnectptr.p->transactionState != TcConnectionrec::COMMITTED)
10263   {
10264     jam();
10265     warningReport(signal, 2);
10266   }
10267   else
10268   {
10269     jam();
10270     warningReport(signal, 3);
10271   }//if
10272 }//Dblqh::execCOMPLETE()
10273 
10274 /* ************************************************************************>>
10275  * COMPLETEREQ: Complete request from TC. Same as COMPLETE but used if one
10276  * node is not working ok (See COMMIT).
10277  * ************************************************************************>> */
execCOMPLETEREQ(Signal * signal)10278 void Dblqh::execCOMPLETEREQ(Signal* signal)
10279 {
10280   jamEntry();
10281   Uint32 reqPtr = signal->theData[0];
10282   BlockReference reqBlockref = signal->theData[1];
10283   Uint32 transid1 = signal->theData[2];
10284   Uint32 transid2 = signal->theData[3];
10285   Uint32 tcOprec = signal->theData[5];
10286   if (ERROR_INSERTED(5005)) {
10287     systemErrorLab(signal, __LINE__);
10288   }
10289   if (ERROR_INSERTED(5018)) {
10290     CLEAR_ERROR_INSERT_VALUE;
10291     sendSignalWithDelay(cownref, GSN_COMPLETEREQ, signal, 2000, 6);
10292     return;
10293   }//if
10294   TcConnectionrecPtr tcConnectptr;
10295   if (findTransaction(transid1,
10296                       transid2,
10297                       tcOprec, 0,
10298                       tcConnectptr) != ZOK)
10299   {
10300     jam();
10301 /*---------------------------------------------------------*/
10302 /*       FOR SOME REASON THE COMPLETE PHASE STARTED AFTER  */
10303 /*       A TIME OUT. THE TRANSACTION IS GONE. WE NEED TO   */
10304 /*       REPORT COMPLETION ANYWAY.                         */
10305 /*---------------------------------------------------------*/
10306     signal->theData[0] = reqPtr;
10307     signal->theData[1] = cownNodeid;
10308     signal->theData[2] = transid1;
10309     signal->theData[3] = transid2;
10310     sendSignal(reqBlockref, GSN_COMPLETECONF, signal, 4, JBB);
10311     warningReport(signal, 7);
10312     return;
10313   }//if
10314   TcConnectionrec * const regTcPtr = tcConnectptr.p;
10315   switch (regTcPtr->transactionState) {
10316   case TcConnectionrec::COMMITTED:
10317     jam();
10318     regTcPtr->reqBlockref = reqBlockref;
10319     regTcPtr->reqRef = reqPtr;
10320     regTcPtr->abortState = TcConnectionrec::REQ_FROM_TC;
10321     /*empty*/;
10322     break;
10323 /*---------------------------------------------------------*/
10324 /*       THE NORMAL CASE.                                  */
10325 /*---------------------------------------------------------*/
10326   case TcConnectionrec::WAIT_TUP_COMMIT:
10327     jam();
10328 /*---------------------------------------------------------*/
10329 /*       FOR SOME REASON THE COMPLETE PHASE STARTED AFTER  */
10330 /*       A TIME OUT. WE HAVE SET THE PROPER VARIABLES SUCH */
10331 /*       THAT A COMPLETECONF WILL BE SENT WHEN COMPLETE IS */
10332 /*       FINISHED.                                         */
10333 /*---------------------------------------------------------*/
10334     regTcPtr->reqBlockref = reqBlockref;
10335     regTcPtr->reqRef = reqPtr;
10336     regTcPtr->abortState = TcConnectionrec::REQ_FROM_TC;
10337     return;
10338   default:
10339     jam();
10340     ndbabort();
10341     return;
10342   }//switch
10343   if (regTcPtr->seqNoReplica != 0 &&
10344       regTcPtr->activeCreat != Fragrecord::AC_NR_COPY) {
10345     /**
10346      * TODO RONM: Align this code with execCOMPLETEREQ which
10347      * handles AC_IGNORED differently. Need to handle
10348      * cnewestGci and fragPtr.p->newestGci also for those
10349      * cases properly.
10350      */
10351     jam();
10352     localCommitLab(signal, tcConnectptr);
10353   }
10354   else if (regTcPtr->seqNoReplica == 0)
10355   {
10356     jam();
10357     completeTransLastLab(signal, tcConnectptr);
10358   }
10359   else
10360   {
10361     jam();
10362     completeTransNotLastLab(signal, tcConnectptr);
10363   }
10364 }//Dblqh::execCOMPLETEREQ()
10365 
10366 /* ************> */
10367 /*  COMPLETED  > */
10368 /* ************> */
execLQHKEYCONF(Signal * signal)10369 void Dblqh::execLQHKEYCONF(Signal* signal)
10370 {
10371   LqhKeyConf * const lqhKeyConf = (LqhKeyConf *)signal->getDataPtr();
10372   TcConnectionrecPtr tcConnectptr;
10373   tcConnectptr.i = lqhKeyConf->opPtr;
10374   if (unlikely(!tcConnect_pool.getValidPtr(tcConnectptr)))
10375   {
10376     errorReport(signal, 2);
10377     return;
10378   }//if
10379   TcConnectionrec::ConnectState connectState = tcConnectptr.p->connectState;
10380   jamEntry();
10381   switch (connectState) {
10382   case TcConnectionrec::LOG_CONNECTED:
10383     jam();
10384     completedLab(signal, tcConnectptr);
10385     return;
10386   case TcConnectionrec::COPY_CONNECTED:
10387     jam();
10388     if (ERROR_INSERTED(5106) &&
10389         signal->getSendersBlockRef() != reference())
10390     {
10391       g_eventLogger->info("LQH %u delaying copy LQHKEYCONF", instance());
10392       sendSignalWithDelay(reference(),
10393                           GSN_LQHKEYCONF,
10394                           signal,
10395                           500,
10396                           7);
10397       return;
10398     }
10399     setup_scan_pointers_from_tc_con(tcConnectptr);
10400     copyCompletedLab(signal, tcConnectptr);
10401     return;
10402   default:
10403     jamLine(tcConnectptr.p->connectState);
10404     ndbabort();
10405   }//switch
10406   return;
10407 }//Dblqh::execLQHKEYCONF()
10408 
10409 /* ------------------------------------------------------------------------- */
10410 /* -------                       COMMIT PHASE                        ------- */
10411 /*                                                                           */
10412 /* ------------------------------------------------------------------------- */
commitReqLab(Signal * signal,Uint32 gci_hi,Uint32 gci_lo,TcConnectionrecPtr tcConnectptr)10413 void Dblqh::commitReqLab(Signal* signal,
10414                          Uint32 gci_hi,
10415                          Uint32 gci_lo,
10416                          TcConnectionrecPtr tcConnectptr)
10417 {
10418   TcConnectionrec * const regTcPtr = tcConnectptr.p;
10419   TcConnectionrec::LogWriteState logWriteState = regTcPtr->logWriteState;
10420   TcConnectionrec::TransactionState transState = regTcPtr->transactionState;
10421   regTcPtr->gci_hi = gci_hi;
10422   regTcPtr->gci_lo = gci_lo;
10423   /**
10424    * TODO RONM: Ensure that cnewestGci and fragPtr.p->newestGci are kept in
10425    * synch in all possible node restart variants. Currently it isn't updated
10426    * when AC_IGNORED set sometimes and sometimes not. How can we ensure that
10427    * a starting node gets the proper setting of those variables after a copy
10428    * phase have been completed.
10429    */
10430   if (transState == TcConnectionrec::PREPARED)
10431   {
10432     if (logWriteState == TcConnectionrec::WRITTEN)
10433     {
10434       jam();
10435       regTcPtr->transactionState = TcConnectionrec::PREPARED_RECEIVED_COMMIT;
10436       TcConnectionrecPtr saveTcPtr = tcConnectptr;
10437       Uint32 blockNo = refToMain(ctupBlockref);
10438       signal->theData[0] = regTcPtr->tupConnectrec;
10439       signal->theData[1] = gci_hi;
10440       signal->theData[2] = gci_lo;
10441       EXECUTE_DIRECT(blockNo, GSN_TUP_WRITELOG_REQ, signal, 3);
10442       jamEntry();
10443       if (regTcPtr->transactionState == TcConnectionrec::LOG_COMMIT_QUEUED) {
10444         jam();
10445         return;
10446       }//if
10447       ndbrequire(regTcPtr->transactionState ==
10448                  TcConnectionrec::LOG_COMMIT_WRITTEN);
10449       tcConnectptr = saveTcPtr;
10450     } else if (logWriteState == TcConnectionrec::NOT_STARTED) {
10451       jam();
10452     } else if (logWriteState == TcConnectionrec::NOT_WRITTEN) {
10453       jam();
10454 /*---------------------------------------------------------------------------*/
10455 /* IT IS A READ OPERATION OR OTHER OPERATION THAT DO NOT USE THE LOG.        */
10456 /*---------------------------------------------------------------------------*/
10457 /*---------------------------------------------------------------------------*/
10458 /* THE LOG HAS NOT BEEN WRITTEN SINCE THE LOG FLAG WAS FALSE. THIS CAN OCCUR */
10459 /* WHEN WE ARE STARTING A NEW FRAGMENT.                                      */
10460 /*---------------------------------------------------------------------------*/
10461       regTcPtr->logWriteState = TcConnectionrec::NOT_STARTED;
10462     } else {
10463       ndbrequire(logWriteState == TcConnectionrec::NOT_WRITTEN_WAIT);
10464       jam();
10465 /*---------------------------------------------------------------------------*/
10466 /* THE STATE WAS SET TO NOT_WRITTEN BY THE OPERATION BUT LATER A SCAN OF ALL */
10467 /* OPERATION RECORD CHANGED IT INTO NOT_WRITTEN_WAIT. THIS INDICATES THAT WE */
10468 /* ARE WAITING FOR THIS OPERATION TO COMMIT OR ABORT SO THAT WE CAN FIND THE */
10469 /* STARTING GLOBAL CHECKPOINT OF THIS NEW FRAGMENT.                          */
10470 /*---------------------------------------------------------------------------*/
10471       checkScanTcCompleted(signal, tcConnectptr);
10472     }//if
10473   } else if (transState == TcConnectionrec::LOG_COMMIT_QUEUED_WAIT_SIGNAL) {
10474     jam();
10475     regTcPtr->transactionState = TcConnectionrec::LOG_COMMIT_QUEUED;
10476     return;
10477   } else if (transState == TcConnectionrec::LOG_COMMIT_WRITTEN_WAIT_SIGNAL) {
10478     jam();
10479   } else {
10480     warningReport(signal, 0);
10481     return;
10482   }//if
10483   if (regTcPtr->seqNoReplica == 0 ||
10484       regTcPtr->activeCreat == Fragrecord::AC_NR_COPY) {
10485     jam();
10486     localCommitLab(signal, tcConnectptr);
10487     return;
10488   }//if
10489   commitReplyLab(signal, tcConnectptr.p);
10490   return;
10491 }//Dblqh::commitReqLab()
10492 
execLQH_WRITELOG_REQ(Signal * signal)10493 void Dblqh::execLQH_WRITELOG_REQ(Signal* signal)
10494 {
10495   jamEntry();
10496   TcConnectionrecPtr tcConnectptr;
10497   tcConnectptr.i = signal->theData[0];
10498   ndbrequire(tcConnect_pool.getUncheckedPtrRW(tcConnectptr));
10499   TcConnectionrec * const regTcPtr = tcConnectptr.p;
10500   Uint32 gci_hi = signal->theData[1];
10501   Uint32 gci_lo = signal->theData[2];
10502   Uint32 newestGci = cnewestGci;
10503   TcConnectionrec::LogWriteState logWriteState = regTcPtr->logWriteState;
10504   TcConnectionrec::TransactionState transState = regTcPtr->transactionState;
10505   ndbrequire(Magic::check_ptr(regTcPtr));
10506   regTcPtr->gci_hi = gci_hi;
10507   regTcPtr->gci_lo = gci_lo;
10508   if (gci_hi > newestGci) {
10509     jam();
10510 /* ------------------------------------------------------------------------- */
10511 /*       KEEP TRACK OF NEWEST GLOBAL CHECKPOINT THAT LQH HAS HEARD OF.       */
10512 /* ------------------------------------------------------------------------- */
10513     cnewestGci = gci_hi;
10514   }//if
10515   if (logWriteState == TcConnectionrec::WRITTEN) {
10516 /*---------------------------------------------------------------------------*/
10517 /* I NEED TO INSERT A COMMIT LOG RECORD SINCE WE ARE WRITING LOG IN THIS     */
10518 /* TRANSACTION.                                                              */
10519 /*---------------------------------------------------------------------------*/
10520     jam();
10521     LogPartRecordPtr regLogPartPtr;
10522     Uint32 noOfLogPages = cnoOfLogPages;
10523     jam();
10524     regLogPartPtr.i = regTcPtr->m_log_part_ptr_i;
10525     ptrCheckGuard(regLogPartPtr, clogPartFileSize, logPartRecord);
10526     if (!regLogPartPtr.p->m_log_complete_queue.isEmpty() ||
10527         (noOfLogPages == 0))
10528     {
10529       jam();
10530 /*---------------------------------------------------------------------------*/
10531 /* THIS LOG PART WAS CURRENTLY ACTIVE WRITING ANOTHER LOG RECORD. WE MUST    */
10532 /* WAIT UNTIL THIS PART HAS COMPLETED ITS OPERATION.                         */
10533 /*---------------------------------------------------------------------------*/
10534 // We must delay the write of commit info to the log to safe-guard against
10535 // a crash due to lack of log pages. We temporary stop all log writes to this
10536 // log part to ensure that we don't get a buffer explosion in the delayed
10537 // signal buffer instead.
10538 /*---------------------------------------------------------------------------*/
10539       linkWaitLog(signal,
10540                   regLogPartPtr,
10541                   regLogPartPtr.p->m_log_complete_queue,
10542                   tcConnectptr);
10543       if (transState == TcConnectionrec::PREPARED) {
10544         jam();
10545         regTcPtr->transactionState =
10546           TcConnectionrec::LOG_COMMIT_QUEUED_WAIT_SIGNAL;
10547       } else {
10548         jam();
10549         ndbrequire(transState == TcConnectionrec::PREPARED_RECEIVED_COMMIT);
10550         regTcPtr->transactionState = TcConnectionrec::LOG_COMMIT_QUEUED;
10551       }//if
10552       return;
10553     }//if
10554     writeCommitLog(signal, regLogPartPtr, tcConnectptr.p);
10555     if (transState == TcConnectionrec::PREPARED) {
10556       jam();
10557       regTcPtr->transactionState =
10558         TcConnectionrec::LOG_COMMIT_WRITTEN_WAIT_SIGNAL;
10559     } else {
10560       jam();
10561       ndbrequire(transState == TcConnectionrec::PREPARED_RECEIVED_COMMIT);
10562       regTcPtr->transactionState = TcConnectionrec::LOG_COMMIT_WRITTEN;
10563     }//if
10564   }//if
10565 }//Dblqh::execLQH_WRITELOG_REQ()
10566 
localCommitLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)10567 void Dblqh::localCommitLab(Signal* signal,
10568                            const TcConnectionrecPtr tcConnectptr)
10569 {
10570   FragrecordPtr regFragptr;
10571   regFragptr.i = tcConnectptr.p->fragmentptr;
10572   c_fragment_pool.getPtr(regFragptr);
10573   Fragrecord::FragStatus status = regFragptr.p->fragStatus;
10574   fragptr = regFragptr;
10575   switch (status) {
10576   case Fragrecord::FSACTIVE:
10577   case Fragrecord::CRASH_RECOVERING:
10578   case Fragrecord::ACTIVE_CREATION:
10579     commitContinueAfterBlockedLab(signal, tcConnectptr);
10580     return;
10581   case Fragrecord::FREE:
10582     ndbabort();
10583   case Fragrecord::DEFINED:
10584     ndbabort();
10585   case Fragrecord::REMOVING:
10586     ndbabort();
10587   default:
10588     ndbabort();
10589   }//switch
10590 }//Dblqh::localCommitLab()
10591 
commitContinueAfterBlockedLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)10592 void Dblqh::commitContinueAfterBlockedLab(
10593                 Signal* signal,
10594                 const TcConnectionrecPtr tcConnectptr)
10595 {
10596 /* ------------------------------------------------------------------------- */
10597 /*INPUT:          TC_CONNECTPTR           ACTIVE OPERATION RECORD            */
10598 /* ------------------------------------------------------------------------- */
10599 /* ------------------------------------------------------------------------- */
10600 /*CONTINUE HERE AFTER BEING BLOCKED FOR A WHILE DURING LOCAL CHECKPOINT.     */
10601 /*The operation is already removed from the active list since there is no    */
10602 /*chance for any real-time breaks before we need to release it.              */
10603 /* ------------------------------------------------------------------------- */
10604 /*ALSO AFTER NORMAL PROCEDURE WE CONTINUE                                    */
10605 /*WE MUST COMMIT TUP BEFORE ACC TO ENSURE THAT NO ONE RACES IN AND SEES A    */
10606 /*DIRTY STATE IN TUP.                                                        */
10607 /* ------------------------------------------------------------------------- */
10608   Ptr<TcConnectionrec> regTcPtr = tcConnectptr;
10609   Ptr<Fragrecord> regFragptr = fragptr;
10610   Uint32 operation = regTcPtr.p->operation;
10611   Uint32 dirtyOp = regTcPtr.p->dirtyOp;
10612   Uint32 opSimple = regTcPtr.p->opSimple;
10613   bool normalProtocol = (regTcPtr.p->m_flags &
10614                          TcConnectionrec::OP_NORMAL_PROTOCOL);
10615 
10616   if (regTcPtr.p->activeCreat != Fragrecord::AC_IGNORED)
10617   {
10618     if (operation != ZREAD)
10619     {
10620 
10621       prefetch_op_record_3((Uint32*)regTcPtr.p->accConnectPtrP);
10622 
10623       TupCommitReq * const tupCommitReq =
10624         (TupCommitReq *)signal->getDataPtrSend();
10625       Uint32 sig0 = regTcPtr.p->tupConnectrec;
10626       Uint32 tup = refToMain(ctupBlockref);
10627       jam();
10628       tupCommitReq->opPtr = sig0;
10629       tupCommitReq->gci_hi = regTcPtr.p->gci_hi;
10630       tupCommitReq->hashValue = regTcPtr.p->hashValue;
10631       tupCommitReq->diskpage = RNIL;
10632       tupCommitReq->gci_lo = regTcPtr.p->gci_lo;
10633       tupCommitReq->transId1 = regTcPtr.p->transid[0];
10634       tupCommitReq->transId2 = regTcPtr.p->transid[1];
10635       EXECUTE_DIRECT(tup, GSN_TUP_COMMITREQ, signal,
10636 		     TupCommitReq::SignalLength);
10637 
10638       if (TRACENR_FLAG)
10639       {
10640 	TRACENR("COMMIT: ");
10641 	switch (regTcPtr.p->operation) {
10642 	case ZREAD: TRACENR("READ"); break;
10643 	case ZUPDATE: TRACENR("UPDATE"); break;
10644 	case ZWRITE: TRACENR("WRITE"); break;
10645 	case ZINSERT: TRACENR("INSERT"); break;
10646 	case ZDELETE: TRACENR("DELETE"); break;
10647         case ZUNLOCK: TRACENR("UNLOCK"); break;
10648 	}
10649 
10650 	TRACENR(" tab: " << regTcPtr.p->tableref
10651 	       << " frag: " << regTcPtr.p->fragmentid
10652 	       << " activeCreat: " << (Uint32)regTcPtr.p->activeCreat);
10653 	if (LqhKeyReq::getNrCopyFlag(regTcPtr.p->reqinfo))
10654 	  TRACENR(" NrCopy");
10655 	if (LqhKeyReq::getRowidFlag(regTcPtr.p->reqinfo))
10656 	  TRACENR(" rowid: " << regTcPtr.p->m_row_id);
10657 	TRACENR(" key: " << getKeyInfoWordOrZero(regTcPtr.p, 0));
10658 
10659         if (signal->theData[0] != 0)
10660           TRACENR(" TIMESLICE");
10661 	TRACENR(endl);
10662       }
10663 
10664       if(signal->theData[0] != 0)
10665       {
10666         regTcPtr.p->transactionState = TcConnectionrec::WAIT_TUP_COMMIT;
10667         return; // TUP_COMMIT was timesliced
10668       }
10669 
10670       TRACE_OP(regTcPtr.p, "ACC_COMMITREQ");
10671 
10672       c_acc->execACC_COMMITREQ(signal,
10673                                regTcPtr.p->accConnectrec,
10674                                regTcPtr.p->accConnectPtrP);
10675 
10676     } else {
10677       if(!dirtyOp){
10678 	TRACE_OP(regTcPtr.p, "ACC_COMMITREQ");
10679         c_acc->execACC_COMMITREQ(signal,
10680                                  regTcPtr.p->accConnectrec,
10681                                  regTcPtr.p->accConnectPtrP);
10682       }
10683 
10684       if (dirtyOp && normalProtocol == 0)
10685       {
10686 	jamDebug();
10687         /**
10688          * The dirtyRead does not send anything but TRANSID_AI from LDM
10689          */
10690 	fragptr = regFragptr;
10691 	cleanUp(signal, regTcPtr);
10692 	return;
10693       }
10694 
10695       /**
10696        * The simpleRead will send a LQHKEYCONF
10697        *   but have already released the locks
10698        */
10699       if (opSimple)
10700       {
10701 	fragptr = regFragptr;
10702         packLqhkeyreqLab(signal, regTcPtr);
10703         return;
10704       }
10705     }
10706   }//if
10707   jamEntry();
10708   fragptr = regFragptr;
10709   tupcommit_conf(signal, regTcPtr, regFragptr.p);
10710 }
10711 
10712 void
tupcommit_conf_callback(Signal * signal,Uint32 tcPtrI)10713 Dblqh::tupcommit_conf_callback(Signal* signal, Uint32 tcPtrI)
10714 {
10715   jamEntry();
10716 
10717   TcConnectionrecPtr tcConnectptr;
10718   tcConnectptr.i = tcPtrI;
10719   ndbrequire(tcConnect_pool.getUncheckedPtrRW(tcConnectptr));
10720   TcConnectionrec * tcPtr = tcConnectptr.p;
10721 
10722   ndbrequire(tcPtr->transactionState == TcConnectionrec::WAIT_TUP_COMMIT);
10723   prefetch_op_record_3((Uint32*)tcPtr->accConnectPtrP);
10724 
10725   FragrecordPtr regFragptr;
10726   regFragptr.i = tcPtr->fragmentptr;
10727   c_fragment_pool.getPtr(regFragptr);
10728   fragptr = regFragptr;
10729 
10730   TRACE_OP(tcPtr, "ACC_COMMITREQ");
10731 
10732   ndbrequire(Magic::check_ptr(tcPtr));
10733   c_acc->execACC_COMMITREQ(signal,
10734                            tcPtr->accConnectrec,
10735                            tcPtr->accConnectPtrP);
10736   jamEntry();
10737 
10738   tcConnectptr.i = tcPtrI;
10739   tcConnectptr.p = tcPtr;
10740   tupcommit_conf(signal, tcConnectptr, regFragptr.p);
10741 }
10742 
10743 void
tupcommit_conf(Signal * signal,const TcConnectionrecPtr tcConnectptr,Fragrecord * regFragptr)10744 Dblqh::tupcommit_conf(Signal* signal,
10745 		      const TcConnectionrecPtr tcConnectptr,
10746 		      Fragrecord * regFragptr)
10747 {
10748   const TcConnectionrec* tcPtrP = tcConnectptr.p;
10749   Uint32 dirtyOp = tcPtrP->dirtyOp;
10750   Uint32 seqNoReplica = tcPtrP->seqNoReplica;
10751   Uint32 activeCreat = tcPtrP->activeCreat;
10752   if (tcPtrP->gci_hi > regFragptr->newestGci &&
10753       tcPtrP->operation != ZREAD &&
10754       tcPtrP->operation != ZREAD_EX)
10755   {
10756     jam();
10757 /* ------------------------------------------------------------------------- */
10758 /*IT IS THE FIRST TIME THIS GLOBAL CHECKPOINT IS INVOLVED IN UPDATING THIS   */
10759 /*FRAGMENT. UPDATE THE VARIABLE THAT KEEPS TRACK OF NEWEST GCI IN FRAGMENT   */
10760 /* ------------------------------------------------------------------------- */
10761     ndbassert(tcPtrP->operation != ZUNLOCK);
10762     regFragptr->newestGci = tcPtrP->gci_hi;
10763     DEB_EXTRA_LCP(("(%u)op_type: %u, newestGci: %u, tableId: %u, fragId: %u",
10764              instance(),
10765              tcPtrP->operation,
10766              regFragptr->newestGci,
10767              regFragptr->tabRef,
10768              regFragptr->fragId));
10769   }//if
10770   if (dirtyOp != ZTRUE)
10771   {
10772     if (seqNoReplica == 0 || activeCreat == Fragrecord::AC_NR_COPY)
10773     {
10774       jam();
10775       commitReplyLab(signal, tcConnectptr.p);
10776       return;
10777     }//if
10778     if (seqNoReplica == 0)
10779     {
10780       jam();
10781       completeTransLastLab(signal, tcConnectptr);
10782     }
10783     else
10784     {
10785       jam();
10786       completeTransNotLastLab(signal, tcConnectptr);
10787     }
10788     return;
10789   } else {
10790 /* ------------------------------------------------------------------------- */
10791 /*WE MUST HANDLE DIRTY WRITES IN A SPECIAL WAY. THESE OPERATIONS WILL NOT    */
10792 /*SEND ANY COMMIT OR COMPLETE MESSAGES TO OTHER NODES. THEY WILL MERELY SEND */
10793 /*THOSE SIGNALS INTERNALLY.                                                  */
10794 /* ------------------------------------------------------------------------- */
10795     if (tcPtrP->abortState == TcConnectionrec::ABORT_IDLE)
10796     {
10797       jam();
10798       if (activeCreat == Fragrecord::AC_NR_COPY)
10799       {
10800 	jam();
10801 	ndbrequire(LqhKeyReq::getNrCopyFlag(tcPtrP->reqinfo));
10802 	ndbrequire(tcPtrP->m_nr_delete.m_cnt == 0);
10803       }
10804       packLqhkeyreqLab(signal, tcConnectptr);
10805     }
10806     else
10807     {
10808       ndbrequire(tcPtrP->abortState != TcConnectionrec::NEW_FROM_TC);
10809       jam();
10810       sendLqhTransconf(signal, LqhTransConf::Committed, tcConnectptr);
10811       cleanUp(signal, tcConnectptr);
10812     }//if
10813   }//if
10814 }//Dblqh::commitContinueAfterBlockedLab()
10815 
commitReplyLab(Signal * signal,TcConnectionrec * const regTcPtr)10816 void Dblqh::commitReplyLab(Signal* signal,
10817                            TcConnectionrec* const regTcPtr)
10818 {
10819 /* -------------------------------------------------------------- */
10820 /* BACKUP AND STAND-BY REPLICAS ONLY UPDATE THE TRANSACTION STATE */
10821 /* -------------------------------------------------------------- */
10822   TcConnectionrec::AbortState abortState = regTcPtr->abortState;
10823   regTcPtr->transactionState = TcConnectionrec::COMMITTED;
10824   if (abortState == TcConnectionrec::ABORT_IDLE) {
10825     Uint32 clientBlockref = regTcPtr->clientBlockref;
10826     if (regTcPtr->seqNoReplica == 0) {
10827       jam();
10828       sendCommittedTc(signal, clientBlockref, regTcPtr);
10829       return;
10830     } else {
10831       jam();
10832       sendCommitLqh(signal, clientBlockref, regTcPtr);
10833       return;
10834     }//if
10835   } else if (regTcPtr->abortState == TcConnectionrec::REQ_FROM_TC) {
10836     jam();
10837     signal->theData[0] = regTcPtr->reqRef;
10838     signal->theData[1] = cownNodeid;
10839     signal->theData[2] = regTcPtr->transid[0];
10840     signal->theData[3] = regTcPtr->transid[1];
10841     sendSignal(regTcPtr->reqBlockref, GSN_COMMITCONF, signal, 4, JBB);
10842   } else {
10843     ndbrequire(regTcPtr->abortState == TcConnectionrec::NEW_FROM_TC);
10844   }//if
10845   return;
10846 }//Dblqh::commitReplyLab()
10847 
10848 /* ------------------------------------------------------------------------- */
10849 /* -------                COMPLETE PHASE                             ------- */
10850 /*                                                                           */
10851 /* ------------------------------------------------------------------------- */
completeTransNotLastLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)10852 void Dblqh::completeTransNotLastLab(Signal* signal,
10853                                     const TcConnectionrecPtr tcConnectptr)
10854 {
10855   TcConnectionrec * const regTcPtr = tcConnectptr.p;
10856   if (regTcPtr->abortState == TcConnectionrec::ABORT_IDLE) {
10857     Uint32 clientBlockref = regTcPtr->clientBlockref;
10858     jam();
10859     sendCompleteLqh(signal, clientBlockref, regTcPtr);
10860     cleanUp(signal, tcConnectptr);
10861     return;
10862   } else {
10863     jam();
10864     completeUnusualLab(signal, tcConnectptr);
10865     return;
10866   }//if
10867 }//Dblqh::completeTransNotLastLab()
10868 
completeTransLastLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)10869 void Dblqh::completeTransLastLab(Signal* signal,
10870                                  const TcConnectionrecPtr tcConnectptr)
10871 {
10872   TcConnectionrec * const regTcPtr = tcConnectptr.p;
10873   if (regTcPtr->abortState == TcConnectionrec::ABORT_IDLE) {
10874     Uint32 clientBlockref = regTcPtr->clientBlockref;
10875     jam();
10876 /* ------------------------------------------------------------------------- */
10877 /*DIRTY WRITES WHICH ARE LAST IN THE CHAIN OF REPLICAS WILL SEND COMPLETED   */
10878 /*INSTEAD OF SENDING PREPARED TO THE TC (OR OTHER INITIATOR OF OPERATION).   */
10879 /* ------------------------------------------------------------------------- */
10880     sendCompletedTc(signal, clientBlockref, regTcPtr);
10881     cleanUp(signal, tcConnectptr);
10882     return;
10883   } else {
10884     jam();
10885     completeUnusualLab(signal, tcConnectptr);
10886     return;
10887   }//if
10888 }//Dblqh::completeTransLastLab()
10889 
completeUnusualLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)10890 void Dblqh::completeUnusualLab(Signal* signal,
10891                                const TcConnectionrecPtr tcConnectptr)
10892 {
10893   TcConnectionrec * const regTcPtr = tcConnectptr.p;
10894   if (regTcPtr->abortState == TcConnectionrec::ABORT_FROM_TC) {
10895     jam();
10896     sendAborted(signal, tcConnectptr);
10897   } else if (regTcPtr->abortState == TcConnectionrec::NEW_FROM_TC) {
10898     jam();
10899   } else {
10900     ndbrequire(regTcPtr->abortState == TcConnectionrec::REQ_FROM_TC);
10901     jam();
10902     signal->theData[0] = regTcPtr->reqRef;
10903     signal->theData[1] = cownNodeid;
10904     signal->theData[2] = regTcPtr->transid[0];
10905     signal->theData[3] = regTcPtr->transid[1];
10906     sendSignal(regTcPtr->reqBlockref,
10907                GSN_COMPLETECONF, signal, 4, JBB);
10908   }//if
10909   cleanUp(signal, tcConnectptr);
10910   return;
10911 }//Dblqh::completeUnusualLab()
10912 
10913 /* ========================================================================= */
10914 /* =======                        RELEASE TC CONNECT RECORD          ======= */
10915 /*                                                                           */
10916 /*       RELEASE A TC CONNECT RECORD TO THE FREELIST.                        */
10917 /* ========================================================================= */
releaseTcrec(Signal * signal,TcConnectionrecPtr locTcConnectptr)10918 void Dblqh::releaseTcrec(Signal* signal, TcConnectionrecPtr locTcConnectptr)
10919 {
10920   jamDebug();
10921   if (unlikely(locTcConnectptr.p->m_dealloc_state ==
10922                TcConnectionrec::DA_DEALLOC_COUNT_ZOMBIE))
10923   {
10924     jam();
10925     /**
10926      * Need to keep this Tcrec around a little extra time to track
10927      * the ref count on TUP storage deallocation
10928      */
10929     return;
10930   }
10931 
10932   ndbassert(locTcConnectptr.p->hashIndex==RNIL);
10933   const Uint32 op = locTcConnectptr.p->operation;
10934 
10935   ndbassert(locTcConnectptr.p->tcScanRec == RNIL);
10936   ndbassert(locTcConnectptr.p->m_committed_log_space == 0);
10937 
10938   TablerecPtr tabPtr;
10939   tabPtr.i = locTcConnectptr.p->tableref;
10940   if(likely(tabPtr.i != RNIL))
10941   {
10942     ptrCheckGuard(tabPtr, ctabrecFileSize, tablerec);
10943 
10944     /**
10945      * Normal case
10946      */
10947     if (op == ZREAD || op == ZUNLOCK)
10948     {
10949       ndbrequire(tabPtr.p->usageCountR > 0);
10950       tabPtr.p->usageCountR--;
10951     }
10952     else
10953     {
10954       ndbrequire(tabPtr.p->usageCountW > 0);
10955       tabPtr.p->usageCountW--;
10956     }
10957   }
10958   if (likely(locTcConnectptr.i < ctcConnectReserved))
10959   {
10960     jamDebug();
10961     const Uint32 firstFree = cfirstfreeTcConrec;
10962     Uint32 numFree = ctcNumFree;
10963     locTcConnectptr.p->tcTimer = 0;
10964     locTcConnectptr.p->transactionState = TcConnectionrec::TC_NOT_CONNECTED;
10965     locTcConnectptr.p->nextTcConnectrec = firstFree;
10966     cfirstfreeTcConrec = locTcConnectptr.i;
10967     ctcNumFree = numFree + 1;
10968   }
10969   else
10970   {
10971     jam();
10972     release_op_rec(locTcConnectptr);
10973   }
10974 }//Dblqh::releaseTcrec()
10975 
releaseTcrecLog(Signal * signal,TcConnectionrecPtr locTcConnectptr)10976 void Dblqh::releaseTcrecLog(Signal* signal, TcConnectionrecPtr locTcConnectptr)
10977 {
10978   jamDebug();
10979   Uint32 numFree = ctcNumFree;
10980   ndbassert(locTcConnectptr.p->hashIndex==RNIL);
10981   locTcConnectptr.p->tcTimer = 0;
10982   locTcConnectptr.p->transactionState = TcConnectionrec::TC_NOT_CONNECTED;
10983   locTcConnectptr.p->nextTcConnectrec = cfirstfreeTcConrec;
10984   cfirstfreeTcConrec = locTcConnectptr.i;
10985   ctcNumFree = numFree + 1;
10986 }//Dblqh::releaseTcrecLog()
10987 
10988 /* ------------------------------------------------------------------------- */
10989 /* -------                       ABORT PHASE                         ------- */
10990 /*                                                                           */
10991 /*THIS PART IS USED AT ERRORS THAT CAUSE ABORT OF TRANSACTION.               */
10992 /* ------------------------------------------------------------------------- */
10993 void
remove_commit_marker(TcConnectionrec * const regTcPtr)10994 Dblqh::remove_commit_marker(TcConnectionrec * const regTcPtr)
10995 {
10996   Ptr<CommitAckMarker> tmp;
10997   Uint32 commitAckMarker = regTcPtr->commitAckMarker;
10998   regTcPtr->commitAckMarker = RNIL;
10999   if (commitAckMarker == RNIL)
11000     return;
11001   jam();
11002   tmp.i = commitAckMarker;
11003   ndbrequire(m_commitAckMarkerPool.getValidPtr(tmp));
11004 #ifdef MARKER_TRACE
11005   ndbout_c("%u remove marker[%.8x %.8x] op: %u ref: %u",
11006            instance(), tmp.p->transid1, tmp.p->transid2,
11007            Uint32(regTcPtr - tcConnectionrec), tmp.p->reference_count);
11008 #endif
11009   if (tmp.p->in_hash == false)
11010   {
11011     ndbout_c("%u remove_commit_marker failed[%.8x %.8x]"
11012              " removed_by_fail_api = %u"
11013              " ack marker transid[%.8x %.8x]"
11014              " ack marker ref count = %d",
11015              instance(),
11016              regTcPtr->transid[0],
11017              regTcPtr->transid[1],
11018              tmp.p->removed_by_fail_api,
11019              tmp.p->transid1,
11020              tmp.p->transid2,
11021              tmp.p->reference_count);
11022     ndbrequire(tmp.p->reference_count == 0);
11023     ndbabort();
11024     return;
11025   }
11026   ndbrequire(tmp.p->reference_count > 0);
11027   if (regTcPtr->transid[0] != tmp.p->transid1 ||
11028       regTcPtr->transid[1] != tmp.p->transid2)
11029   {
11030     /**
11031      * We refer to a commit ack marker that have already been removed
11032      * and even reused by another transaction.
11033      */
11034     ndbout_c("%u remove_commit_marker failed, moved[%.8x %.8x]"
11035              " removed_by_fail_api = %u"
11036              " ack marker transid[%.8x %.8x]"
11037              " ack marker ref count = %d",
11038              instance(),
11039              regTcPtr->transid[0],
11040              regTcPtr->transid[1],
11041              tmp.p->removed_by_fail_api,
11042              tmp.p->transid1,
11043              tmp.p->transid2,
11044              tmp.p->reference_count);
11045     ndbabort();
11046     return;
11047   }
11048   tmp.p->reference_count--;
11049   if (tmp.p->reference_count == 0)
11050   {
11051     jam();
11052     CommitAckMarker key;
11053     key.transid1 = regTcPtr->transid[0];
11054     key.transid2 = regTcPtr->transid[1];
11055     CommitAckMarkerPtr removedPtr;
11056     m_commitAckMarkerHash.remove(removedPtr, key);
11057     ndbrequire(removedPtr.i != RNIL);
11058     ndbrequire(removedPtr.i == tmp.i);
11059     removedPtr.p->in_hash = false;
11060     m_commitAckMarkerPool.release(removedPtr);
11061     checkPoolShrinkNeed(DBLQH_COMMIT_ACK_MARKER_TRANSIENT_POOL_INDEX,
11062                         m_commitAckMarkerPool);
11063   }
11064 }
11065 
11066 /* ***************************************************>> */
11067 /*  ABORT: Abort transaction in connection. Sender TC.   */
11068 /*  This is the normal protocol (See COMMIT)             */
11069 /* ***************************************************>> */
execABORT(Signal * signal)11070 void Dblqh::execABORT(Signal* signal)
11071 {
11072   jamEntry();
11073   if (ERROR_INSERTED(5096))
11074   {
11075     jam();
11076     g_eventLogger->info("LQH %u : ERRINS 5096 Stalling ABORT",
11077                         instance());
11078     sendSignalWithDelay(reference(), GSN_ABORT, signal, 10, 4);
11079     return;
11080   }
11081   if (ERROR_INSERTED(5095))
11082   {
11083     jam();
11084     g_eventLogger->info("LQH %u : ERRINS 5095 Passing abort and setting ERRINS 5096",
11085                         instance());
11086     SET_ERROR_INSERT_VALUE(5096);
11087   }
11088 
11089   Uint32 tcOprec = signal->theData[0];
11090   BlockReference tcBlockref = signal->theData[1];
11091   Uint32 transid1 = signal->theData[2];
11092   Uint32 transid2 = signal->theData[3];
11093   CRASH_INSERTION(5003);
11094   if (ERROR_INSERTED(5015)) {
11095     CLEAR_ERROR_INSERT_VALUE;
11096     sendSignalWithDelay(cownref, GSN_ABORT, signal, 2000, 4);
11097     return;
11098   }//if
11099   TcConnectionrecPtr tcConnectptr;
11100   if (findTransaction(transid1,
11101                       transid2,
11102                       tcOprec, 0,
11103                       tcConnectptr) != ZOK)
11104   {
11105     jam();
11106 
11107     if(ERROR_INSERTED(5039) &&
11108        refToNode(signal->getSendersBlockRef()) != getOwnNodeId()){
11109       jam();
11110       SET_ERROR_INSERT_VALUE(5040);
11111       return;
11112     }
11113 
11114     if(ERROR_INSERTED(5040) &&
11115        refToNode(signal->getSendersBlockRef()) != getOwnNodeId()){
11116       jam();
11117       SET_ERROR_INSERT_VALUE(5003);
11118       return;
11119     }
11120 
11121 /* ------------------------------------------------------------------------- */
11122 // SEND ABORTED EVEN IF NOT FOUND.
11123 //THE TRANSACTION MIGHT NEVER HAVE ARRIVED HERE.
11124 /* ------------------------------------------------------------------------- */
11125     signal->theData[0] = tcOprec;
11126     signal->theData[1] = transid1;
11127     signal->theData[2] = transid2;
11128     signal->theData[3] = cownNodeid;
11129     signal->theData[4] = ZTRUE;
11130     sendSignal(tcBlockref, GSN_ABORTED, signal, 5, JBB);
11131     warningReport(signal, 8);
11132     return;
11133   }//if
11134 
11135   TcConnectionrec * const regTcPtr = tcConnectptr.p;
11136   if (ERROR_INSERTED(5100))
11137   {
11138     SET_ERROR_INSERT_VALUE(5101);
11139     return;
11140   }
11141   CRASH_INSERTION2(5101, regTcPtr->nextReplica != ZNIL);
11142 
11143 /* ------------------------------------------------------------------------- */
11144 /*A GUIDING DESIGN PRINCIPLE IN HANDLING THESE ERROR SITUATIONS HAVE BEEN    */
11145 /*KEEP IT SIMPLE. THUS WE RATHER INSERT A WAIT AND SET THE ABORT_STATE TO    */
11146 /*ACTIVE RATHER THAN WRITE NEW CODE TO HANDLE EVERY SPECIAL SITUATION.       */
11147 /* ------------------------------------------------------------------------- */
11148   if (regTcPtr->nextReplica != ZNIL) {
11149 /* ------------------------------------------------------------------------- */
11150 // We will immediately send the ABORT message also to the next LQH node in line.
11151 /* ------------------------------------------------------------------------- */
11152     FragrecordPtr Tfragptr;
11153     Tfragptr.i = regTcPtr->fragmentptr;
11154     c_fragment_pool.getPtr(Tfragptr);
11155     Uint32 Tnode = regTcPtr->nextReplica;
11156     Uint32 instanceKey = Tfragptr.p->lqhInstanceKey;
11157     BlockReference TLqhRef = numberToRef(DBLQH, instanceKey, Tnode);
11158     signal->theData[0] = regTcPtr->tcOprec;
11159     signal->theData[1] = regTcPtr->tcBlockref;
11160     signal->theData[2] = regTcPtr->transid[0];
11161     signal->theData[3] = regTcPtr->transid[1];
11162     sendSignal(TLqhRef, GSN_ABORT, signal, 4, JBB);
11163   }//if
11164   regTcPtr->abortState = TcConnectionrec::ABORT_FROM_TC;
11165 
11166   remove_commit_marker(regTcPtr);
11167   TRACE_OP(regTcPtr, "ABORT");
11168 
11169   abortStateHandlerLab(signal, tcConnectptr);
11170 
11171   return;
11172 }//Dblqh::execABORT()
11173 
11174 /* ************************************************************************>>
11175  *  ABORTREQ: Same as ABORT but used in case one node isn't working ok.
11176  *  (See COMMITREQ)
11177  * ************************************************************************>> */
execABORTREQ(Signal * signal)11178 void Dblqh::execABORTREQ(Signal* signal)
11179 {
11180   jamEntry();
11181   Uint32 reqPtr = signal->theData[0];
11182   BlockReference reqBlockref = signal->theData[1];
11183   Uint32 transid1 = signal->theData[2];
11184   Uint32 transid2 = signal->theData[3];
11185   Uint32 tcOprec = signal->theData[5];
11186   if (ERROR_INSERTED(5006)) {
11187     systemErrorLab(signal, __LINE__);
11188   }
11189   if (ERROR_INSERTED(5016)) {
11190     CLEAR_ERROR_INSERT_VALUE;
11191     sendSignalWithDelay(cownref, GSN_ABORTREQ, signal, 2000, 6);
11192     return;
11193   }//if
11194   TcConnectionrecPtr tcConnectptr;
11195   if (findTransaction(transid1,
11196                       transid2,
11197                       tcOprec, 0,
11198                       tcConnectptr) != ZOK)
11199   {
11200     signal->theData[0] = reqPtr;
11201     signal->theData[2] = cownNodeid;
11202     signal->theData[3] = transid1;
11203     signal->theData[4] = transid2;
11204     sendSignal(reqBlockref, GSN_ABORTCONF, signal, 5, JBB);
11205     warningReport(signal, 9);
11206     return;
11207   }//if
11208   TcConnectionrec * const regTcPtr = tcConnectptr.p;
11209   if (unlikely(regTcPtr->transactionState != TcConnectionrec::PREPARED))
11210   {
11211     jam();
11212     warningReport(signal, 10);
11213     return;
11214   }//if
11215   regTcPtr->reqBlockref = reqBlockref;
11216   regTcPtr->reqRef = reqPtr;
11217   regTcPtr->abortState = TcConnectionrec::REQ_FROM_TC;
11218 
11219   abortCommonLab(signal, tcConnectptr);
11220   return;
11221 }//Dblqh::execABORTREQ()
11222 
11223 /* ************>> */
11224 /*  ACC_TO_REF  > */
11225 /* ************>> */
execACC_TO_REF(Signal * signal,const TcConnectionrecPtr tcConnectptr)11226 void Dblqh::execACC_TO_REF(Signal* signal,
11227                            const TcConnectionrecPtr tcConnectptr)
11228 {
11229   jamEntry();
11230   terrorCode = signal->theData[1];
11231   abortErrorLab(signal, tcConnectptr);
11232   return;
11233 }//Dblqh::execACC_TO_REF()
11234 
11235 /* ************> */
11236 /*  ACCKEYREF  > */
11237 /* ************> */
execACCKEYREF(Signal * signal)11238 void Dblqh::execACCKEYREF(Signal* signal)
11239 {
11240   jamEntry();
11241   TcConnectionrecPtr tcConnectptr;
11242   tcConnectptr.i = signal->theData[0];
11243   terrorCode = signal->theData[1];
11244   ndbrequire(tcConnect_pool.getValidPtr(tcConnectptr));
11245   TcConnectionrec * const tcPtr = tcConnectptr.p;
11246   switch (tcPtr->transactionState) {
11247   case TcConnectionrec::WAIT_ACC:
11248     jam();
11249     c_fragment_pool.getPtr(tcPtr->fragmentptr)->m_useStat.m_keyRefCount++;
11250     break;
11251   case TcConnectionrec::WAIT_ACC_ABORT:
11252   case TcConnectionrec::ABORT_QUEUED:
11253     jam();
11254 /* ------------------------------------------------------------------------- */
11255 /*IGNORE SINCE ABORT OF THIS OPERATION IS ONGOING ALREADY.                   */
11256 /* ------------------------------------------------------------------------- */
11257     return;
11258     break;
11259   default:
11260     ndbabort();
11261   }//switch
11262   const Uint32 errCode = terrorCode;
11263   tcPtr->errorCode = errCode;
11264 
11265   if (TRACENR_FLAG)
11266   {
11267     TRACENR("ACCKEYREF: " << errCode << " ");
11268     switch (tcPtr->operation) {
11269     case ZREAD: TRACENR("READ"); break;
11270     case ZUPDATE: TRACENR("UPDATE"); break;
11271     case ZWRITE: TRACENR("WRITE"); break;
11272     case ZINSERT: TRACENR("INSERT"); break;
11273     case ZDELETE: TRACENR("DELETE"); break;
11274     case ZUNLOCK: TRACENR("UNLOCK"); break;
11275     default: TRACENR("<Unknown: " << tcPtr->operation << ">"); break;
11276     }
11277 
11278     TRACENR(" tab: " << tcPtr->tableref
11279 	   << " frag: " << tcPtr->fragmentid
11280 	   << " activeCreat: " << (Uint32)tcPtr->activeCreat);
11281     if (LqhKeyReq::getNrCopyFlag(tcPtr->reqinfo))
11282       TRACENR(" NrCopy");
11283     if (LqhKeyReq::getRowidFlag(tcPtr->reqinfo))
11284       TRACENR(" rowid: " << tcPtr->m_row_id);
11285     TRACENR(" key: " << getKeyInfoWordOrZero(tcPtr, 0));
11286     TRACENR(endl);
11287 
11288   }
11289 
11290   ndbrequire(tcPtr->activeCreat == Fragrecord::AC_NORMAL);
11291   ndbrequire(!LqhKeyReq::getNrCopyFlag(tcPtr->reqinfo));
11292 
11293   /**
11294    * Not only primary replica can get ZTUPLE_ALREADY_EXIST || ZNO_TUPLE_FOUND
11295    *
11296    * 1) op1 - primary insert ok
11297    * 2) op1 - backup insert fail (log full or what ever)
11298    * 3) op1 - delete ok @ primary
11299    * 4) op1 - delete fail @ backup
11300    *
11301    * -> ZNO_TUPLE_FOUND is possible
11302    *
11303    * 1) op1 primary delete ok
11304    * 2) op1 backup delete fail (log full or what ever)
11305    * 3) op2 insert ok @ primary
11306    * 4) op2 insert fail @ backup
11307    *
11308    * -> ZTUPLE_ALREADY_EXIST
11309    */
11310   tcPtr->abortState = TcConnectionrec::ABORT_FROM_LQH;
11311   abortCommonLab(signal, tcConnectptr);
11312   return;
11313 }//Dblqh::execACCKEYREF()
11314 
localAbortStateHandlerLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)11315 void Dblqh::localAbortStateHandlerLab(Signal* signal,
11316                                       const TcConnectionrecPtr tcConnectptr)
11317 {
11318   TcConnectionrec * const regTcPtr = tcConnectptr.p;
11319   if (regTcPtr->abortState != TcConnectionrec::ABORT_IDLE) {
11320     jam();
11321     return;
11322   }//if
11323   regTcPtr->abortState = TcConnectionrec::ABORT_FROM_LQH;
11324   regTcPtr->errorCode = terrorCode;
11325   abortStateHandlerLab(signal, tcConnectptr);
11326   return;
11327 }//Dblqh::localAbortStateHandlerLab()
11328 
abortStateHandlerLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)11329 void Dblqh::abortStateHandlerLab(Signal* signal,
11330                                  const TcConnectionrecPtr tcConnectptr)
11331 {
11332   TcConnectionrec * const regTcPtr = tcConnectptr.p;
11333   switch (regTcPtr->transactionState) {
11334   case TcConnectionrec::PREPARED:
11335     jam();
11336 /* ------------------------------------------------------------------------- */
11337 /*THE OPERATION IS ALREADY PREPARED AND SENT TO THE NEXT LQH OR BACK TO TC.  */
11338 /*WE CAN SIMPLY CONTINUE WITH THE ABORT PROCESS.                             */
11339 /*IF IT WAS A CHECK FOR TRANSACTION STATUS THEN WE REPORT THE STATUS TO THE  */
11340 /*NEW TC AND CONTINUE WITH THE NEXT OPERATION IN LQH.                        */
11341 /* ------------------------------------------------------------------------- */
11342     if (regTcPtr->abortState == TcConnectionrec::NEW_FROM_TC) {
11343       jam();
11344       sendLqhTransconf(signal, LqhTransConf::Prepared, tcConnectptr);
11345       return;
11346     }//if
11347     break;
11348   case TcConnectionrec::LOG_COMMIT_WRITTEN_WAIT_SIGNAL:
11349   case TcConnectionrec::LOG_COMMIT_QUEUED_WAIT_SIGNAL:
11350     jam();
11351 /* ------------------------------------------------------------------------- */
11352 // We can only reach these states for multi-updates on a record in a transaction.
11353 // We know that at least one of those has received the COMMIT signal, thus we
11354 // declare us only prepared since we then receive the expected COMMIT signal.
11355 /* ------------------------------------------------------------------------- */
11356     ndbrequire(regTcPtr->abortState == TcConnectionrec::NEW_FROM_TC);
11357     sendLqhTransconf(signal, LqhTransConf::Prepared, tcConnectptr);
11358     return;
11359   case TcConnectionrec::WAIT_TUPKEYINFO:
11360   case TcConnectionrec::WAIT_ATTR:
11361     jam();
11362 /* ------------------------------------------------------------------------- */
11363 /* WE ARE CURRENTLY WAITING FOR MORE INFORMATION. WE CAN START THE ABORT     */
11364 /* PROCESS IMMEDIATELY. THE KEYINFO AND ATTRINFO SIGNALS WILL BE DROPPED     */
11365 /* SINCE THE ABORT STATE WILL BE SET.                                        */
11366 /* ------------------------------------------------------------------------- */
11367     break;
11368   case TcConnectionrec::WAIT_TUP:
11369     jam();
11370 /* ------------------------------------------------------------------------- */
11371 // TUP is currently active. We have to wait for the TUPKEYREF or TUPKEYCONF
11372 // to arrive since we might otherwise jeopardise the local checkpoint
11373 // consistency in overload situations.
11374 /* ------------------------------------------------------------------------- */
11375     regTcPtr->transactionState = TcConnectionrec::WAIT_TUP_TO_ABORT;
11376     DEB_COPY(("(%u)transactionState(%u) set to WAIT_TUP_TO_ABORT,"
11377               " abortState: %u",
11378               instance(),
11379               tcConnectptr.i,
11380               regTcPtr->abortState));
11381     return;
11382   case TcConnectionrec::WAIT_ACC:
11383     jam();
11384     abortContinueAfterBlockedLab(signal, regTcPtr);
11385     return;
11386     break;
11387   case TcConnectionrec::LOG_QUEUED:
11388     jam();
11389     remove_from_prepare_log_queue(signal, tcConnectptr);
11390     break;
11391   case TcConnectionrec::WAIT_AI_AFTER_ABORT:
11392     jam();
11393 /* ------------------------------------------------------------------------- */
11394 /* ABORT OF ACC AND TUP ALREADY COMPLETED. THIS STATE IS ONLY USED WHEN      */
11395 /* CREATING A NEW FRAGMENT.                                                  */
11396 /* ------------------------------------------------------------------------- */
11397     continueAbortLab(signal, tcConnectptr);
11398     return;
11399     break;
11400   case TcConnectionrec::WAIT_TUP_TO_ABORT:
11401   case TcConnectionrec::LOG_ABORT_QUEUED:
11402   case TcConnectionrec::WAIT_ACC_ABORT:
11403   case TcConnectionrec::ABORT_QUEUED:
11404     jam();
11405 /* ------------------------------------------------------------------------- */
11406 /*ABORT IS ALREADY ONGOING DUE TO SOME ERROR. WE HAVE ALREADY SET THE STATE  */
11407 /*OF THE ABORT SO THAT WE KNOW THAT TC EXPECTS A REPORT. WE CAN THUS SIMPLY  */
11408 /*EXIT.                                                                      */
11409 /* ------------------------------------------------------------------------- */
11410     return;
11411     break;
11412   case TcConnectionrec::WAIT_TUP_COMMIT:
11413   case TcConnectionrec::LOG_COMMIT_QUEUED:
11414   case TcConnectionrec::COMMIT_QUEUED:
11415     jam();
11416 /* ------------------------------------------------------------------------- */
11417 /*THIS IS ONLY AN ALLOWED STATE IF A DIRTY WRITE OR SIMPLE READ IS PERFORMED.*/
11418 /*IF WE ARE MERELY CHECKING THE TRANSACTION STATE IT IS ALSO AN ALLOWED STATE*/
11419 /* ------------------------------------------------------------------------- */
11420     if (regTcPtr->dirtyOp == ZTRUE) {
11421       jam();
11422 /* ------------------------------------------------------------------------- */
11423 /*COMPLETE THE DIRTY WRITE AND THEN REPORT COMPLETED BACK TO TC. SINCE IT IS */
11424 /*A DIRTY WRITE IT IS ALLOWED TO COMMIT EVEN IF THE TRANSACTION ABORTS.      */
11425 /* ------------------------------------------------------------------------- */
11426       return;
11427     }//if
11428     if (regTcPtr->opSimple) {
11429       jam();
11430 /* ------------------------------------------------------------------------- */
11431 /*A SIMPLE READ IS CURRENTLY RELEASING THE LOCKS OR WAITING FOR ACCESS TO    */
11432 /*ACC TO CLEAR THE LOCKS. COMPLETE THIS PROCESS AND THEN RETURN AS NORMAL.   */
11433 /*NO DATA HAS CHANGED DUE TO THIS SIMPLE READ ANYWAY.                        */
11434 /* ------------------------------------------------------------------------- */
11435       return;
11436     }//if
11437     ndbrequire(regTcPtr->abortState == TcConnectionrec::NEW_FROM_TC);
11438     jam();
11439 /* ------------------------------------------------------------------------- */
11440 /*WE ARE ONLY CHECKING THE STATUS OF THE TRANSACTION. IT IS COMMITTING.      */
11441 /*COMPLETE THE COMMIT LOCALLY AND THEN SEND REPORT OF COMMITTED TO THE NEW TC*/
11442 /* ------------------------------------------------------------------------- */
11443     sendLqhTransconf(signal, LqhTransConf::Committed, tcConnectptr);
11444     return;
11445     break;
11446   case TcConnectionrec::COMMITTED:
11447     jam();
11448     ndbrequire(regTcPtr->abortState == TcConnectionrec::NEW_FROM_TC);
11449 /* ------------------------------------------------------------------------- */
11450 /*WE ARE CHECKING TRANSACTION STATUS. REPORT COMMITTED AND CONTINUE WITH THE */
11451 /*NEXT OPERATION.                                                            */
11452 /* ------------------------------------------------------------------------- */
11453     sendLqhTransconf(signal, LqhTransConf::Committed, tcConnectptr);
11454     return;
11455     break;
11456   default:
11457     ndbabort();
11458 /* ------------------------------------------------------------------------- */
11459 /*THE STATE WAS NOT AN ALLOWED STATE ON A NORMAL OPERATION. SCANS AND COPY   */
11460 /*FRAGMENT OPERATIONS SHOULD HAVE EXECUTED IN ANOTHER PATH.                  */
11461 /* ------------------------------------------------------------------------- */
11462   }//switch
11463   abortCommonLab(signal, tcConnectptr);
11464   return;
11465 }//Dblqh::abortStateHandlerLab()
11466 
abortErrorLab(Signal * signal,TcConnectionrecPtr tcConnectptr)11467 void Dblqh::abortErrorLab(Signal* signal, TcConnectionrecPtr tcConnectptr)
11468 {
11469   ndbrequire(tcConnect_pool.getValidPtr(tcConnectptr));
11470   TcConnectionrec * const regTcPtr = tcConnectptr.p;
11471   if (regTcPtr->abortState == TcConnectionrec::ABORT_IDLE) {
11472     jam();
11473     regTcPtr->abortState = TcConnectionrec::ABORT_FROM_LQH;
11474     regTcPtr->errorCode = terrorCode;
11475   }//if
11476   abortCommonLab(signal, tcConnectptr);
11477   return;
11478 }//Dblqh::abortErrorLab()
11479 
abortCommonLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)11480 void Dblqh::abortCommonLab(Signal* signal,
11481                            const TcConnectionrecPtr tcConnectptr)
11482 {
11483   TcConnectionrec * const regTcPtr = tcConnectptr.p;
11484   const Uint32 activeCreat = regTcPtr->activeCreat;
11485 
11486   remove_commit_marker(regTcPtr);
11487 
11488   if (unlikely(activeCreat == Fragrecord::AC_NR_COPY))
11489   {
11490     jam();
11491     if (regTcPtr->m_nr_delete.m_cnt)
11492     {
11493       jam();
11494       /**
11495        * Let operation wait for pending NR operations
11496        */
11497      DEB_COPY(("(%u)Blocked in abortCommonLab for %u",
11498                instance(),
11499                tcConnectptr.i));
11500 #ifdef VM_TRACE
11501       /**
11502        * Only disk table can have pending ops...
11503        */
11504       TablerecPtr tablePtr;
11505       tablePtr.i = regTcPtr->tableref;
11506       ptrCheckGuard(tablePtr, ctabrecFileSize, tablerec);
11507       ndbrequire(tablePtr.p->m_disk_table);
11508 #endif
11509       return;
11510     }
11511   }
11512 
11513   fragptr.i = regTcPtr->fragmentptr;
11514   if (fragptr.i != RNIL) {
11515     jam();
11516     c_fragment_pool.getPtr(fragptr);
11517     switch (fragptr.p->fragStatus) {
11518     case Fragrecord::FSACTIVE:
11519     case Fragrecord::CRASH_RECOVERING:
11520     case Fragrecord::ACTIVE_CREATION:
11521       abortContinueAfterBlockedLab(signal, regTcPtr);
11522       return;
11523       break;
11524     case Fragrecord::FREE:
11525       ndbabort();
11526     case Fragrecord::DEFINED:
11527       ndbabort();
11528     case Fragrecord::REMOVING:
11529       ndbabort();
11530     default:
11531       ndbabort();
11532     }//switch
11533   } else {
11534     jam();
11535     continueAbortLab(signal, tcConnectptr);
11536   }//if
11537 }//Dblqh::abortCommonLab()
11538 
abortContinueAfterBlockedLab(Signal * signal,TcConnectionrec * regTcPtr)11539 void Dblqh::abortContinueAfterBlockedLab(Signal* signal,
11540                                          TcConnectionrec* regTcPtr)
11541 {
11542   /* ------------------------------------------------------------------------
11543    *       INPUT:          TC_CONNECTPTR           ACTIVE OPERATION RECORD
11544    * ------------------------------------------------------------------------
11545    * ------------------------------------------------------------------------
11546    *       CAN COME HERE AS RESTART AFTER BEING BLOCKED BY A LOCAL CHECKPOINT.
11547    * ------------------------------------------------------------------------
11548    *       ALSO AS PART OF A NORMAL ABORT WITHOUT BLOCKING.
11549    *       WE MUST ABORT TUP BEFORE ACC TO ENSURE THAT NO ONE RACES IN
11550    *       AND SEES A STATE IN TUP.
11551    * ----------------------------------------------------------------------- */
11552   TRACE_OP(regTcPtr, "ACC ABORT");
11553   Uint32 canBlock = 2; // 2, block if needed
11554   switch(regTcPtr->transactionState){
11555   case TcConnectionrec::WAIT_TUP:
11556     jam();
11557     /**
11558      * This is when getting from execTUPKEYREF
11559      *   in which case we *do* have ACC lock
11560      *   and should not (need to) block
11561      */
11562     canBlock = 0;
11563     break;
11564   default:
11565     break;
11566   }
11567 
11568   regTcPtr->transactionState = TcConnectionrec::WAIT_ACC_ABORT;
11569   c_acc->execACC_ABORTREQ(signal,
11570                           regTcPtr->accConnectrec,
11571                           regTcPtr->accConnectPtrP,
11572                           canBlock);
11573 
11574   if (signal->theData[1] == RNIL)
11575   {
11576     jam();
11577     /* ------------------------------------------------------------------------
11578      * We need to insert a real-time break by sending ACC_ABORTCONF through the
11579      * job buffer to ensure that we catch any ACCKEYCONF or TUPKEYCONF or
11580      * TUPKEYREF that are in the job buffer but not yet processed. Doing
11581      * everything without that would race and create a state error when they
11582      * are executed.
11583      * --------------------------------------------------------------------- */
11584     return;
11585   }
11586 
11587   execACC_ABORTCONF(signal);
11588   return;
11589 }//Dblqh::abortContinueAfterBlockedLab()
11590 
11591 /* ******************>> */
11592 /*  ACC_ABORTCONF     > */
11593 /* ******************>> */
execACC_ABORTCONF(Signal * signal)11594 void Dblqh::execACC_ABORTCONF(Signal* signal)
11595 {
11596   jamEntry();
11597   TcConnectionrecPtr tcConnectptr;
11598   tcConnectptr.i = signal->theData[0];
11599   ndbrequire(tcConnect_pool.getValidPtr(tcConnectptr));
11600   TcConnectionrec * const regTcPtr = tcConnectptr.p;
11601   ndbrequire(regTcPtr->transactionState == TcConnectionrec::WAIT_ACC_ABORT);
11602 
11603   TRACE_OP(regTcPtr, "ACC_ABORTCONF");
11604   signal->theData[0] = regTcPtr->tupConnectrec;
11605   EXECUTE_DIRECT(DBTUP, GSN_TUP_ABORTREQ, signal, 1);
11606 
11607   jamEntry();
11608   continueAbortLab(signal, tcConnectptr);
11609   return;
11610 }//Dblqh::execACC_ABORTCONF()
11611 
continueAbortLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)11612 void Dblqh::continueAbortLab(Signal* signal,
11613                              const TcConnectionrecPtr tcConnectptr)
11614 {
11615   TcConnectionrec * const regTcPtr = tcConnectptr.p;
11616   /* ------------------------------------------------------------------------
11617    *  AN ERROR OCCURED IN THE ACTIVE CREATION AFTER THE ABORT PHASE.
11618    *  WE NEED TO CONTINUE WITH A NORMAL ABORT.
11619    * ------------------------------------------------------------------------
11620    *       ALSO USED FOR NORMAL CLEAN UP AFTER A NORMAL ABORT.
11621    * ------------------------------------------------------------------------
11622    *       ALSO USED WHEN NO FRAGMENT WAS SET UP ON OPERATION.
11623    * ------------------------------------------------------------------------ */
11624   if (regTcPtr->logWriteState == TcConnectionrec::WRITTEN) {
11625     jam();
11626     /* ----------------------------------------------------------------------
11627      * I NEED TO INSERT A ABORT LOG RECORD SINCE WE ARE WRITING LOG IN THIS
11628      * TRANSACTION.
11629      * ---------------------------------------------------------------------- */
11630     initLogPointers(signal, tcConnectptr);
11631     if (cnoOfLogPages == 0 ||
11632         !logPartPtr.p->m_log_complete_queue.isEmpty())
11633     {
11634       jam();
11635       /* --------------------------------------------------------------------
11636        * A PREPARE OPERATION IS CURRENTLY WRITING IN THE LOG.
11637        * WE MUST WAIT ON OUR TURN TO WRITE THE LOG.
11638        * IT IS NECESSARY TO WRITE ONE LOG RECORD COMPLETELY
11639        * AT A TIME OTHERWISE WE WILL SCRAMBLE THE LOG.
11640        * -------------------------------------------------------------------- */
11641       linkWaitLog(signal,
11642                   logPartPtr,
11643                   logPartPtr.p->m_log_complete_queue,
11644                   tcConnectptr);
11645       regTcPtr->transactionState = TcConnectionrec::LOG_ABORT_QUEUED;
11646       return;
11647     }//if
11648     writeAbortLog(signal, tcConnectptr.p, logPartPtr.p);
11649     removeLogTcrec(signal, tcConnectptr);
11650   } else if (regTcPtr->logWriteState == TcConnectionrec::NOT_STARTED) {
11651     jam();
11652   } else if (regTcPtr->logWriteState == TcConnectionrec::NOT_WRITTEN) {
11653     jam();
11654     /* ------------------------------------------------------------------
11655      * IT IS A READ OPERATION OR OTHER OPERATION THAT DO NOT USE THE LOG.
11656      * ------------------------------------------------------------------ */
11657     /* ------------------------------------------------------------------
11658      * THE LOG HAS NOT BEEN WRITTEN SINCE THE LOG FLAG WAS FALSE.
11659      * THIS CAN OCCUR WHEN WE ARE STARTING A NEW FRAGMENT.
11660      * ------------------------------------------------------------------ */
11661     regTcPtr->logWriteState = TcConnectionrec::NOT_STARTED;
11662   } else {
11663     ndbrequire(regTcPtr->logWriteState == TcConnectionrec::NOT_WRITTEN_WAIT);
11664     jam();
11665     /* ----------------------------------------------------------------
11666      * THE STATE WAS SET TO NOT_WRITTEN BY THE OPERATION BUT LATER
11667      * A SCAN OF ALL OPERATION RECORD CHANGED IT INTO NOT_WRITTEN_WAIT.
11668      * THIS INDICATES THAT WE ARE WAITING FOR THIS OPERATION TO COMMIT
11669      * OR ABORT SO THAT WE CAN FIND THE
11670      * STARTING GLOBAL CHECKPOINT OF THIS NEW FRAGMENT.
11671      * ---------------------------------------------------------------- */
11672      checkScanTcCompleted(signal, tcConnectptr);
11673   }//if
11674   continueAfterLogAbortWriteLab(signal, tcConnectptr);
11675   return;
11676 }//Dblqh::continueAbortLab()
11677 
continueAfterLogAbortWriteLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)11678 void Dblqh::continueAfterLogAbortWriteLab(
11679                 Signal* signal,
11680                 const TcConnectionrecPtr tcConnectptr)
11681 {
11682   TcConnectionrec * const regTcPtr = tcConnectptr.p;
11683   bool normalProtocol = (regTcPtr->m_flags &
11684                          TcConnectionrec::OP_NORMAL_PROTOCOL);
11685 
11686   remove_commit_marker(regTcPtr);
11687 
11688   if (regTcPtr->operation == ZREAD && regTcPtr->dirtyOp &&
11689       !normalProtocol)
11690   {
11691     jam();
11692     TcKeyRef * const tcKeyRef = (TcKeyRef *) signal->getDataPtrSend();
11693 
11694     tcKeyRef->connectPtr = regTcPtr->applOprec;
11695     tcKeyRef->transId[0] = regTcPtr->transid[0];
11696     tcKeyRef->transId[1] = regTcPtr->transid[1];
11697     tcKeyRef->errorCode = regTcPtr->errorCode;
11698     sendTCKEYREF(signal, regTcPtr->applRef, regTcPtr->tcBlockref, 0);
11699     cleanUp(signal, tcConnectptr);
11700     return;
11701   }//if
11702   if (regTcPtr->abortState == TcConnectionrec::ABORT_FROM_LQH) {
11703     LqhKeyRef * const lqhKeyRef = (LqhKeyRef *)signal->getDataPtrSend();
11704 
11705     jam();
11706     lqhKeyRef->userRef = regTcPtr->clientConnectrec;
11707     lqhKeyRef->connectPtr = regTcPtr->tcOprec;
11708     lqhKeyRef->errorCode = regTcPtr->errorCode;
11709     lqhKeyRef->transId1 = regTcPtr->transid[0];
11710     lqhKeyRef->transId2 = regTcPtr->transid[1];
11711     Uint32 block = refToMain(regTcPtr->clientBlockref);
11712     if (block != RESTORE)
11713     {
11714       sendSignal(regTcPtr->clientBlockref, GSN_LQHKEYREF, signal,
11715                  LqhKeyRef::SignalLength, JBB);
11716     }
11717     else
11718     {
11719       ndbrequire(refToNode(regTcPtr->clientBlockref) == cownNodeid &&
11720                  refToInstance(regTcPtr->clientBlockref) == instance());
11721       EXECUTE_DIRECT(RESTORE, GSN_LQHKEYREF,
11722                      signal, LqhKeyRef::SignalLength);
11723     }
11724   } else if (regTcPtr->abortState == TcConnectionrec::ABORT_FROM_TC) {
11725     jam();
11726     sendAborted(signal, tcConnectptr);
11727   } else if (regTcPtr->abortState == TcConnectionrec::NEW_FROM_TC) {
11728     jam();
11729     sendLqhTransconf(signal, LqhTransConf::Aborted, tcConnectptr);
11730   } else {
11731     ndbrequire(regTcPtr->abortState == TcConnectionrec::REQ_FROM_TC);
11732     jam();
11733     signal->theData[0] = regTcPtr->reqRef;
11734     signal->theData[1] = tcConnectptr.i;
11735     signal->theData[2] = cownNodeid;
11736     signal->theData[3] = regTcPtr->transid[0];
11737     signal->theData[4] = regTcPtr->transid[1];
11738     sendSignal(regTcPtr->reqBlockref, GSN_ABORTCONF,
11739                signal, 5, JBB);
11740   }//if
11741   cleanUp(signal, tcConnectptr);
11742 }//Dblqh::continueAfterLogAbortWriteLab()
11743 
11744 void
sendTCKEYREF(Signal * signal,Uint32 ref,Uint32 routeRef,Uint32 cnt)11745 Dblqh::sendTCKEYREF(Signal* signal, Uint32 ref, Uint32 routeRef, Uint32 cnt)
11746 {
11747   const Uint32 nodeId = refToNode(ref);
11748   const bool connectedToNode = getNodeInfo(nodeId).m_connected;
11749   /**
11750    * ROUTE_ORD signals should not be sent via SPJ as it does not handle these
11751    * and (unlike TC) may not be connected to the API anyway.
11752    */
11753   ndbrequire(routeRef == 0 ||
11754              nodeId == getOwnNodeId() ||
11755              refToMain(routeRef) == DBTC);
11756 
11757   if (likely(connectedToNode &&
11758              !ERROR_INSERTED_CLEAR(5079)))
11759   {
11760     jam();
11761     sendSignal(ref, GSN_TCKEYREF, signal, TcKeyRef::SignalLength, JBB);
11762   }
11763   else
11764   {
11765     if (routeRef &&
11766 	getNodeInfo(refToNode(routeRef)).m_version >= MAKE_VERSION(5,1,14))
11767     {
11768       jam();
11769       memmove(signal->theData+25, signal->theData, 4*TcKeyRef::SignalLength);
11770       RouteOrd* ord = (RouteOrd*)signal->getDataPtrSend();
11771       ord->dstRef = ref;
11772       ord->srcRef = reference();
11773       ord->gsn = GSN_TCKEYREF;
11774       ord->cnt = 0;
11775       LinearSectionPtr ptr[3];
11776       ptr[0].p = signal->theData+25;
11777       ptr[0].sz = TcKeyRef::SignalLength;
11778       sendSignal(routeRef, GSN_ROUTE_ORD, signal, RouteOrd::SignalLength, JBB,
11779 		 ptr, 1);
11780     }
11781     else
11782     {
11783       jam();
11784       memmove(signal->theData + 3, signal->theData, 4*TcKeyRef::SignalLength);
11785       signal->theData[0] = ZRETRY_TCKEYREF;
11786       signal->theData[1] = cnt + 1;
11787       signal->theData[2] = ref;
11788       sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 100,
11789 			  TcKeyRef::SignalLength + 3);
11790     }
11791   }
11792 }
11793 
11794 /* ##########################################################################
11795  * #######                       MODULE TO HANDLE TC FAILURE          #######
11796  *
11797  * ########################################################################## */
11798 
11799 /* ************************************************************************>>
11800  *  NODE_FAILREP: Node failure report. Sender Ndbcntr. Set status of failed
11801  *  node to down and reply with NF_COMPLETEREP to DIH which will report that
11802  *  LQH has completed failure handling.
11803  * ************************************************************************>> */
execNODE_FAILREP(Signal * signal)11804 void Dblqh::execNODE_FAILREP(Signal* signal)
11805 {
11806   UintR TfoundNodes = 0;
11807   UintR TnoOfNodes;
11808   UintR Tdata[MAX_NDB_NODES];
11809   Uint32 i;
11810 
11811   NodeFailRep * const nodeFail = (NodeFailRep *)&signal->theData[0];
11812 
11813   if(signal->getLength() == NodeFailRep::SignalLength)
11814   {
11815     jam();
11816     ndbrequire(signal->getNoOfSections() == 1);
11817     ndbrequire(getNodeInfo(refToNode(signal->getSendersBlockRef())).m_version);
11818     SegmentedSectionPtr ptr;
11819     SectionHandle handle(this, signal);
11820     handle.getSection(ptr, 0);
11821     memset(nodeFail->theNodes, 0, sizeof(nodeFail->theNodes));
11822     copy(nodeFail->theNodes, ptr);
11823     releaseSections(handle);
11824   }
11825   else
11826   {
11827     memset(nodeFail->theNodes + NdbNodeBitmask48::Size, 0,
11828            _NDB_NBM_DIFF_BYTES);
11829   }
11830   TnoOfNodes = nodeFail->noOfNodes;
11831   UintR index = 0;
11832   for (i = 1; i < MAX_NDB_NODES; i++) {
11833     jam();
11834     if(NdbNodeBitmask::get(nodeFail->theNodes, i)){
11835       jam();
11836       Tdata[index] = i;
11837       index++;
11838     }//if
11839   }//for
11840 
11841 #ifdef ERROR_INSERT
11842   c_master_node_id = nodeFail->masterNodeId;
11843 #endif
11844 
11845   lcpPtr.i = 0;
11846   ptrAss(lcpPtr, lcpRecord);
11847 
11848   ndbrequire(index == TnoOfNodes);
11849   ndbrequire(cnoOfNodes - 1 < MAX_NDB_NODES);
11850   for (i = 0; i < TnoOfNodes; i++) {
11851     const Uint32 nodeId = Tdata[i];
11852 
11853     {
11854       HostRecordPtr Thostptr;
11855       Thostptr.i = nodeId;
11856       ptrCheckGuard(Thostptr, chostFileSize, hostRecord);
11857       Thostptr.p->nodestatus = ZNODE_DOWN;
11858     }
11859 
11860     for (Uint32 j = 0; j < cnoOfNodes; j++) {
11861       jam();
11862       if (cnodeData[j] == nodeId){
11863         jam();
11864         cnodeStatus[j] = ZNODE_DOWN;
11865 
11866         TfoundNodes++;
11867       }//if
11868     }//for
11869 
11870     /* Perform block-level ndbd failure handling */
11871     Callback cb = { safe_cast(&Dblqh::ndbdFailBlockCleanupCallback),
11872                     Tdata[i] };
11873     simBlockNodeFailure(signal, Tdata[i], cb);
11874   }//for
11875   ndbrequire(TnoOfNodes == TfoundNodes);
11876 }//Dblqh::execNODE_FAILREP()
11877 
11878 
11879 void
ndbdFailBlockCleanupCallback(Signal * signal,Uint32 failedNodeId,Uint32 ignoredRc)11880 Dblqh::ndbdFailBlockCleanupCallback(Signal* signal,
11881                                     Uint32 failedNodeId,
11882                                     Uint32 ignoredRc)
11883 {
11884   jamEntry();
11885 
11886   NFCompleteRep * const nfCompRep = (NFCompleteRep *)&signal->theData[0];
11887   nfCompRep->blockNo      = DBLQH;
11888   nfCompRep->nodeId       = cownNodeid;
11889   nfCompRep->failedNodeId = failedNodeId;
11890   BlockReference dihRef = !isNdbMtLqh() ? DBDIH_REF : DBLQH_REF;
11891   sendSignal(dihRef, GSN_NF_COMPLETEREP, signal,
11892              NFCompleteRep::SignalLength, JBB);
11893 }
11894 
11895 /* ************************************************************************>>
11896  *  LQH_TRANSREQ: Report status of all transactions where TC was coordinated
11897  *  by a crashed TC
11898  * ************************************************************************>> */
11899 /* ************************************************************************>>
11900  *  THIS SIGNAL IS RECEIVED AFTER A NODE CRASH.
11901  *  THE NODE HAD A TC AND COORDINATED A NUMBER OF TRANSACTIONS.
11902  *  NOW THE MASTER NODE IS PICKING UP THOSE TRANSACTIONS
11903  *  TO COMPLETE THEM. EITHER ABORT THEM OR COMMIT THEM.
11904  * ************************************************************************>> */
execLQH_TRANSREQ(Signal * signal)11905 void Dblqh::execLQH_TRANSREQ(Signal* signal)
11906 {
11907   jamEntry();
11908 
11909   if (!checkNodeFailSequence(signal))
11910   {
11911     jam();
11912     return;
11913   }
11914   LqhTransReq * const lqhTransReq = (LqhTransReq *)&signal->theData[0];
11915   Uint32 newTcPtr = lqhTransReq->senderData;
11916   BlockReference newTcBlockref = lqhTransReq->senderRef;
11917   Uint32 oldNodeId = lqhTransReq->failedNodeId;
11918   Uint32 instanceId = lqhTransReq->instanceId;
11919   if (signal->getLength() < LqhTransReq::SignalLength)
11920   {
11921     /**
11922      * TC that performs take over doesn't suppport taking over one
11923      * TC instance at a time => we read an unitialised variable,
11924      * set it to RNIL to indicate we try take over all instances.
11925      * This code is really only needed in ndbd since ndbmtd handles
11926      * it also in LQH proxy.
11927      */
11928     instanceId = RNIL;
11929   }
11930   TcNodeFailRecordPtr tcNodeFailPtr;
11931   tcNodeFailPtr.i = oldNodeId;
11932   ptrCheckGuard(tcNodeFailPtr, ctcNodeFailrecFileSize, tcNodeFailRecord);
11933   if ((tcNodeFailPtr.p->tcFailStatus == TcNodeFailRecord::TC_STATE_TRUE) ||
11934       (tcNodeFailPtr.p->tcFailStatus == TcNodeFailRecord::TC_STATE_BREAK)) {
11935     jam();
11936     tcNodeFailPtr.p->lastNewTcBlockref = newTcBlockref;
11937   /* ------------------------------------------------------------------------
11938    * WE HAVE RECEIVED A SIGNAL SPECIFYING THAT WE NEED TO HANDLE THE FAILURE
11939    * OF A TC.  NOW WE RECEIVE ANOTHER SIGNAL WITH THE SAME ORDER. THIS CAN
11940    * OCCUR IF THE NEW TC FAILS. WE MUST BE CAREFUL IN THIS CASE SO THAT WE DO
11941    * NOT START PARALLEL ACTIVITIES TRYING TO DO THE SAME THING. WE SAVE THE
11942    * NEW BLOCK REFERENCE TO THE LAST NEW TC IN A VARIABLE AND ASSIGN TO IT TO
11943    * NEW_TC_BLOCKREF WHEN THE OLD PROCESS RETURNS TO LQH_TRANS_NEXT. IT IS
11944    * CERTAIN TO COME THERE SINCE THIS IS THE ONLY PATH TO TAKE CARE OF THE
11945    * NEXT TC CONNECT RECORD. WE SET THE STATUS TO BREAK TO INDICATE TO THE OLD
11946    * PROCESS WHAT IS HAPPENING.
11947    * ------------------------------------------------------------------------ */
11948     tcNodeFailPtr.p->lastNewTcRef = newTcPtr;
11949     tcNodeFailPtr.p->lastTakeOverInstanceId = instanceId;
11950     tcNodeFailPtr.p->tcFailStatus = TcNodeFailRecord::TC_STATE_BREAK;
11951     return;
11952   }//if
11953   tcNodeFailPtr.p->oldNodeId = oldNodeId;
11954   tcNodeFailPtr.p->newTcBlockref = newTcBlockref;
11955   tcNodeFailPtr.p->newTcRef = newTcPtr;
11956   tcNodeFailPtr.p->takeOverInstanceId = instanceId;
11957   tcNodeFailPtr.p->maxInstanceId = 0;
11958   tcNodeFailPtr.p->tcRecNow = 0;
11959   tcNodeFailPtr.p->tcFailStatus = TcNodeFailRecord::TC_STATE_TRUE;
11960   signal->theData[0] = ZLQH_TRANS_NEXT;
11961   signal->theData[1] = tcNodeFailPtr.i;
11962   sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
11963   return;
11964 }//Dblqh::execLQH_TRANSREQ()
11965 
11966 bool
check_tc_and_update_max_instance(BlockReference ref,TcNodeFailRecord * tcNodeFailPtr)11967 Dblqh::check_tc_and_update_max_instance(BlockReference ref,
11968                                         TcNodeFailRecord *tcNodeFailPtr)
11969 {
11970   if (refToNode(ref) == tcNodeFailPtr->oldNodeId)
11971   {
11972     jam();
11973     Uint32 instanceId = refToInstance(ref);
11974     if (instanceId > tcNodeFailPtr->maxInstanceId)
11975     {
11976       /**
11977        * Inform take over TC instance about the maximum instance id
11978        * such that the TC instance knows when to stop the take over
11979        * process.
11980        */
11981       tcNodeFailPtr->maxInstanceId = instanceId;
11982     }
11983     if ((tcNodeFailPtr->takeOverInstanceId == RNIL) ||
11984         (instanceId == tcNodeFailPtr->takeOverInstanceId))
11985     {
11986       jam();
11987       return true;
11988     }
11989   }
11990   jam();
11991   return false;
11992 }
11993 
lqhTransNextLab(Signal * signal,TcNodeFailRecordPtr tcNodeFailPtr)11994 void Dblqh::lqhTransNextLab(Signal* signal,
11995                             TcNodeFailRecordPtr tcNodeFailPtr)
11996 {
11997   if (tcNodeFailPtr.p->tcFailStatus == TcNodeFailRecord::TC_STATE_BREAK) {
11998     jam();
11999     /* ----------------------------------------------------------------------
12000      *  AN INTERRUPTION TO THIS NODE FAIL HANDLING WAS RECEIVED AND A NEW
12001      *  TC HAVE BEEN ASSIGNED TO TAKE OVER THE FAILED TC. PROBABLY THE OLD
12002      *  NEW TC HAVE FAILED.
12003      * ---------------------------------------------------------------------- */
12004     tcNodeFailPtr.p->newTcBlockref = tcNodeFailPtr.p->lastNewTcBlockref;
12005     tcNodeFailPtr.p->newTcRef = tcNodeFailPtr.p->lastNewTcRef;
12006     tcNodeFailPtr.p->takeOverInstanceId =
12007       tcNodeFailPtr.p->lastTakeOverInstanceId;
12008     tcNodeFailPtr.p->maxInstanceId = 0;
12009     tcNodeFailPtr.p->tcRecNow = 0;
12010     tcNodeFailPtr.p->tcFailStatus = TcNodeFailRecord::TC_STATE_TRUE;
12011   }//if
12012   TcConnectionrecPtr tcConnectptr;
12013   tcConnectptr.i = tcNodeFailPtr.p->tcRecNow;
12014   for (Uint32 i = 0; i < 100; i++)
12015   {
12016     bool found = getNextTcConRec(tcNodeFailPtr.p->tcRecNow,
12017                                  tcConnectptr,
12018                                  10);
12019     if (tcNodeFailPtr.p->tcRecNow != RNIL && !found)
12020     {
12021       /**
12022        * We scanned without finding any records for a long
12023        * time, thus we will treat this as looping 10 times
12024        * in this loop.
12025        */
12026       jam();
12027       i+= 10;
12028       continue;
12029     }
12030     else if (tcNodeFailPtr.p->tcRecNow == RNIL)
12031     {
12032       jam();
12033       /**
12034        * Finished with scanning operation record
12035        *
12036        * now scan markers
12037        */
12038 #ifdef ERROR_INSERT
12039       if (ERROR_INSERTED(5061))
12040       {
12041         CLEAR_ERROR_INSERT_VALUE;
12042         for (Uint32 i = 0; i < cnoOfNodes; i++)
12043         {
12044           Uint32 node = cnodeData[i];
12045           if (node != getOwnNodeId() && cnodeStatus[i] == ZNODE_UP)
12046           {
12047             ndbout_c("clearing ERROR_INSERT in LQH:%u", node);
12048             signal->theData[0] = 0;
12049             sendSignal(numberToRef(DBLQH, node), GSN_NDB_TAMPER,
12050                        signal, 1, JBB);
12051           }
12052         }
12053 
12054         signal->theData[0] = ZSCAN_MARKERS;
12055         signal->theData[1] = tcNodeFailPtr.i;
12056         signal->theData[2] = 0;
12057         sendSignalWithDelay(cownref, GSN_CONTINUEB, signal, 5000, 3);
12058         return;
12059       }
12060 
12061       if (ERROR_INSERTED(5050))
12062       {
12063         ndbout_c("send ZSCAN_MARKERS with 5s delay and killing master: %u",
12064                  c_master_node_id);
12065         CLEAR_ERROR_INSERT_VALUE;
12066         signal->theData[0] = ZSCAN_MARKERS;
12067         signal->theData[1] = tcNodeFailPtr.i;
12068         signal->theData[2] = 0;
12069         sendSignalWithDelay(cownref, GSN_CONTINUEB, signal, 5000, 3);
12070 
12071         signal->theData[0] = 9999;
12072         sendSignal(numberToRef(CMVMI, c_error_insert_extra),
12073                    GSN_NDB_TAMPER, signal, 1, JBB);
12074         return;
12075       }
12076 #endif
12077       scanMarkers(signal, tcNodeFailPtr.i, 0);
12078       return;
12079     }//if
12080     if (tcConnectptr.p->transactionState != TcConnectionrec::IDLE)
12081     {
12082       if (tcConnectptr.p->transactionState !=
12083           TcConnectionrec::TC_NOT_CONNECTED)
12084       {
12085         if (tcConnectptr.p->tcScanRec == RNIL)
12086         {
12087           if (check_tc_and_update_max_instance(tcConnectptr.p->tcBlockref,
12088                                                tcNodeFailPtr.p))
12089           {
12090             /**
12091              * We send the take over message only for operations that belong
12092              * to the failed node and also are part of the TC instance that
12093              * we are currently taking over, instanceId == RNIL means that the
12094              * signal came from an old version that didn't support multi-TC
12095              * instance take over. In this case we try to take over all
12096              * instances in one go.
12097              */
12098             switch( tcConnectptr.p->operation )
12099             {
12100             case ZUNLOCK :
12101               jam(); /* Skip over */
12102               break;
12103             case ZREAD :
12104               jam();
12105               if (tcConnectptr.p->opSimple == ZTRUE)
12106               {
12107                 jam();
12108                 break; /* Skip over */
12109               }
12110               /* Fall through */
12111             default :
12112               jam();
12113               tcConnectptr.p->tcNodeFailrec = tcNodeFailPtr.i;
12114               tcConnectptr.p->abortState = TcConnectionrec::NEW_FROM_TC;
12115               abortStateHandlerLab(signal, tcConnectptr);
12116               return;
12117             } // switch
12118           }
12119         } else {
12120           /**
12121            * Scans don't require any keeping of state in TC that takes
12122            * over, thus we need not handle scans one instance at a time.
12123            * We can handle all scans immediately. The same goes for copy
12124            * operations since we can have a very limited number of copy
12125            * operations ongoing in parallel.
12126            */
12127           scanptr.i = tcConnectptr.p->tcScanRec;
12128 	  ndbrequire(c_scanRecordPool.getValidPtr(scanptr));
12129 	  switch(scanptr.p->scanType){
12130 	  case ScanRecord::COPY:
12131 	  {
12132             jam();
12133             if (scanptr.p->scanNodeId == tcNodeFailPtr.p->oldNodeId) {
12134               jam();
12135 	      /* ------------------------------------------------------------
12136 	       * THE RECEIVER OF THE COPY HAVE FAILED.
12137 	       * WE HAVE TO CLOSE THE COPY PROCESS.
12138 	       * ----------------------------------------------------------- */
12139 	      if (0) ndbout_c("close copy");
12140               tcConnectptr.p->tcNodeFailrec = tcNodeFailPtr.i;
12141               tcConnectptr.p->abortState = TcConnectionrec::NEW_FROM_TC;
12142               closeCopyRequestLab(signal, tcConnectptr);
12143               return;
12144             }
12145 	    break;
12146 	  }
12147 	  case ScanRecord::SCAN:
12148 	  {
12149 	    jam();
12150 	    if (refToNode(tcConnectptr.p->tcBlockref) ==
12151 		tcNodeFailPtr.p->oldNodeId) {
12152 	      jam();
12153 	      tcConnectptr.p->tcNodeFailrec = tcNodeFailPtr.i;
12154 	      tcConnectptr.p->abortState = TcConnectionrec::NEW_FROM_TC;
12155 	      closeScanRequestLab(signal, tcConnectptr, false);
12156 	      return;
12157 	    }//if
12158 	    break;
12159 	  }
12160 	  default:
12161             ndbout_c("scanptr.p->scanType: %u", scanptr.p->scanType);
12162             ndbout_c("tcConnectptr.p->transactionState: %u",
12163                      tcConnectptr.p->transactionState);
12164 	    ndbabort();
12165 	  }
12166         }
12167       }
12168       else
12169       {
12170 #if defined VM_TRACE || defined ERROR_INSERT
12171         jam();
12172         ndbrequire(tcConnectptr.p->tcScanRec == RNIL);
12173 #endif
12174       }
12175     }
12176     else
12177     {
12178 #if defined VM_TRACE || defined ERROR_INSERT
12179       jam();
12180       ndbrequire(tcConnectptr.p->tcScanRec == RNIL);
12181 #endif
12182     }
12183   }//for
12184   signal->theData[0] = ZLQH_TRANS_NEXT;
12185   signal->theData[1] = tcNodeFailPtr.i;
12186   sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
12187   return;
12188 }//Dblqh::lqhTransNextLab()
12189 
12190 void
scanMarkers(Signal * signal,Uint32 tcNodeFail,Uint32 commitAckMarkerPtrI)12191 Dblqh::scanMarkers(Signal* signal,
12192                    Uint32 tcNodeFail,
12193                    Uint32 commitAckMarkerPtrI)
12194 {
12195   jam();
12196 
12197   TcNodeFailRecordPtr tcNodeFailPtr;
12198   tcNodeFailPtr.i = tcNodeFail;
12199   ptrCheckGuard(tcNodeFailPtr, ctcNodeFailrecFileSize, tcNodeFailRecord);
12200 
12201   if (tcNodeFailPtr.p->tcFailStatus == TcNodeFailRecord::TC_STATE_BREAK)
12202   {
12203     jam();
12204 
12205     /* ----------------------------------------------------------------------
12206      *  AN INTERRUPTION TO THIS NODE FAIL HANDLING WAS RECEIVED AND A NEW
12207      *  TC HAVE BEEN ASSIGNED TO TAKE OVER THE FAILED TC. PROBABLY THE OLD
12208      *  NEW TC HAVE FAILED.
12209      * ---------------------------------------------------------------------- */
12210     lqhTransNextLab(signal, tcNodeFailPtr);
12211     return;
12212   }
12213 
12214   const Uint32 RT_BREAK = 128;
12215   for (Uint32 i = 0; i < RT_BREAK; i++)
12216   {
12217     jam();
12218     CommitAckMarkerPtr commitAckMarkerPtr;
12219     bool found = getNextCommitAckMarker(commitAckMarkerPtrI,
12220                                         commitAckMarkerPtr,
12221                                         10);
12222     if (found)
12223     {
12224       ndbrequire(commitAckMarkerPtr.p->in_hash);
12225       jam();
12226       if (check_tc_and_update_max_instance(commitAckMarkerPtr.p->tcRef,
12227                                            tcNodeFailPtr.p))
12228       {
12229         jam();
12230         /**
12231          * Found marker belonging to crashed node and to instance currently
12232          * being handled.
12233          */
12234         LqhTransConf * const lqhTransConf = (LqhTransConf *)&signal->theData[0];
12235         lqhTransConf->tcRef     = tcNodeFailPtr.p->newTcRef;
12236         lqhTransConf->lqhNodeId = cownNodeid;
12237         lqhTransConf->operationStatus = LqhTransConf::Marker;
12238         lqhTransConf->transId1 = commitAckMarkerPtr.p->transid1;
12239         lqhTransConf->transId2 = commitAckMarkerPtr.p->transid2;
12240         lqhTransConf->apiRef   = commitAckMarkerPtr.p->apiRef;
12241         lqhTransConf->apiOpRec = commitAckMarkerPtr.p->apiOprec;
12242         sendSignal(tcNodeFailPtr.p->newTcBlockref, GSN_LQH_TRANSCONF,
12243 		   signal, 7, JBB);
12244 
12245         signal->theData[0] = ZSCAN_MARKERS;
12246         signal->theData[1] = tcNodeFailPtr.i;
12247         signal->theData[2] = commitAckMarkerPtrI;
12248         sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
12249         return;
12250       }
12251     }
12252     if (commitAckMarkerPtrI == RNIL)
12253     {
12254       /**
12255        * Done with iteration
12256        */
12257       jam();
12258 
12259       tcNodeFailPtr.p->tcFailStatus = TcNodeFailRecord::TC_STATE_FALSE;
12260       LqhTransConf * const lqhTransConf = (LqhTransConf *)&signal->theData[0];
12261       lqhTransConf->tcRef     = tcNodeFailPtr.p->newTcRef;
12262       lqhTransConf->lqhNodeId = cownNodeid;
12263       lqhTransConf->operationStatus = LqhTransConf::LastTransConf;
12264       lqhTransConf->maxInstanceId = tcNodeFailPtr.p->maxInstanceId;
12265       sendSignal(tcNodeFailPtr.p->newTcBlockref, GSN_LQH_TRANSCONF,
12266 		 signal, LqhTransConf::SignalLength, JBB);
12267       return;
12268     }
12269     jam();
12270   }
12271 
12272   signal->theData[0] = ZSCAN_MARKERS;
12273   signal->theData[1] = tcNodeFailPtr.i;
12274   signal->theData[2] = commitAckMarkerPtrI;
12275   sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
12276 }
12277 
12278 /* #########################################################################
12279  * #######                       SCAN MODULE                         #######
12280  *
12281  * #########################################################################
12282  * -------------------------------------------------------------------------
12283  * THIS MODULE CONTAINS THE CODE THAT HANDLES A SCAN OF A PARTICULAR FRAGMENT
12284  * IT OPERATES UNDER THE CONTROL OF TC AND ORDERS ACC TO PERFORM A SCAN OF
12285  * ALL TUPLES IN THE FRAGMENT. TUP PERFORMS THE NECESSARY SEARCH CONDITIONS
12286  * TO ENSURE THAT ONLY VALID TUPLES ARE RETURNED TO THE APPLICATION.
12287  * ------------------------------------------------------------------------- */
12288 
12289 /**
12290  *  ACC_SCANCONF obsolete by usage of Direct Execute
12291  *
12292  *  Callee of ACC_SCANREQ will get return value in
12293  *  signal[8] and call accScanConf{Scan|Copy}Lab()
12294  *  directly if OK.
12295  */
12296 
rt_break_is_scan_prioritised(Uint32 scan_ptr_i)12297 Uint32 Dblqh::rt_break_is_scan_prioritised(Uint32 scan_ptr_i)
12298 {
12299   ScanRecordPtr scanPtr;
12300   scanPtr.i = scan_ptr_i;
12301   ndbrequire(c_scanRecordPool.getUncheckedPtrRO(scanPtr));
12302   m_scan_direct_count = 1; /* Initialise before rt break */
12303   bool ret = is_prioritised_scan(scanPtr.p->scanApiBlockref);
12304   ndbrequire(Magic::check_ptr(scanPtr.p));
12305   return ret;
12306 }
12307 
12308 /* ************>> */
12309 /*  ACC_SCANREF > */
12310 /* ************>> */
execACC_SCANREF(Signal * signal,const TcConnectionrecPtr tcConnectptr)12311 void Dblqh::execACC_SCANREF(Signal* signal,
12312                             const TcConnectionrecPtr tcConnectptr)
12313 {
12314   const AccScanRef refCopy = *(const AccScanRef*)signal->getDataPtr();
12315   const AccScanRef* ref = &refCopy;
12316   ndbrequire(ref->errorCode != 0);
12317 
12318   tcConnectptr.p->errorCode = ref->errorCode;
12319 
12320   /*
12321    * MRR scan can hit this between 2 DBTUX scans.  Previous range has
12322    * terminated via last NEXT_SCANCONF, then index is set to Dropping,
12323    * and then next range is started and returns ACC_SCANREF.
12324    */
12325   if (scanptr.p->scanStoredProcId != RNIL) {
12326     jam();
12327     scanptr.p->scanCompletedStatus = ZTRUE;
12328     accScanCloseConfLab(signal, tcConnectptr);
12329     return;
12330   }
12331   tupScanCloseConfLab(signal, tcConnectptr);
12332 }//Dblqh::execACC_SCANREF()
12333 
12334 /* ***************> */
12335 /*  NEXT_SCANREF  > */
12336 /* ***************> */
execNEXT_SCANREF(Signal * signal)12337 void Dblqh::execNEXT_SCANREF(Signal* signal)
12338 {
12339   jamEntry();
12340   ndbrequire(refToMain(signal->getSendersBlockRef()) == DBTUX);
12341   exec_next_scan_ref(signal);
12342 }
12343 
exec_next_scan_ref(Signal * signal)12344 void Dblqh::exec_next_scan_ref(Signal *signal)
12345 {
12346   jamEntry();
12347   const NextScanRef refCopy = *(const NextScanRef*)signal->getDataPtr();
12348   const NextScanRef* ref = &refCopy;
12349   ndbrequire(ref->errorCode != 0);
12350 
12351   scanptr.i = ref->scanPtr;
12352   ndbrequire(c_scanRecordPool.getValidPtr(scanptr));
12353   TcConnectionrecPtr tcConnectptr;
12354   tcConnectptr.i = scanptr.p->scanTcrec;
12355   ndbrequire(tcConnect_pool.getValidPtr(tcConnectptr));
12356   tcConnectptr.p->errorCode = ref->errorCode;
12357 
12358   /*
12359    * MRR scan may have other ranges left.  But the scan has already
12360    * failed.  Terminate the scan now.
12361    */
12362   scanptr.p->scanCompletedStatus = ZTRUE;
12363   accScanCloseConfLab(signal, tcConnectptr);
12364 }//Dblqh::execNEXT_SCANREF()
12365 
12366 /**
12367  *  ACC_SCANCONF obsolete by usage of Direct Execute
12368  *
12369  *  Callee of ACC_SCANREQ will get return value in
12370  *  signal[8] and call accScanConf{Scan|Copy}Lab()
12371  *  directly if OK.
12372  */
12373 Uint32
get_scan_api_op_ptr(Uint32 scan_api_ptr_i)12374 Dblqh::get_scan_api_op_ptr(Uint32 scan_api_ptr_i)
12375 {
12376   ScanRecordPtr scanPtr;
12377   scanPtr.i = scan_api_ptr_i;
12378   ndbrequire(c_scanRecordPool.getUncheckedPtrRW(scanPtr));
12379   Uint32 apiOpPtr = scanPtr.p->scanApiOpPtr;
12380   ndbrequire(Magic::check_ptr(scanPtr.p));
12381   return apiOpPtr;
12382 }
12383 
12384 /**
12385  * Here we have some code to handle real-time breaks during
12386  * PREPARE of key operations and scanning operations. All
12387  * real-time breaks are handled through signals sent to the
12388  * DBLQH block, it isn't allowed to send an asynchronous
12389  * signal to any other block as part of processing an
12390  * LQHKEYREQ, SCAN_FRAGREQ and SCAN_NEXTREQ.
12391  *
12392  * There is one exception where DBTUP sends a CONTINUEB signal
12393  * to itself when performing a full partition scan,
12394  * as part of handling this method the setup_scan_pointers
12395  * method is called to initialise all variables.
12396  *
12397  * At receive of SCAN_FRAGREQ and LQHKEYREQ we build up the
12398  * block pointers for key operations and scan operations as
12399  * part of setting up the infrastructure to execute the key
12400  * and scan operation.
12401  *
12402  * For key operations there are only two real-time breaks
12403  * that can occur.
12404  * 1) After a row lock wait we return from DBACC in
12405  *    execACCKEYCONF.
12406  * 2) After reading the disk page we return in
12407  *    acckeyconf_load_diskpage_callback.
12408  * 3) It is also used in execATTRINFO and execKEYINFO, these
12409  *    are used with short signals, but these have been
12410  *    deprecated a long time.
12411  *
12412  * In a scan we have many more places from where we can return
12413  * after a real-time break.
12414  *
12415  * 1) Normal real-time break when a scan must take a break to
12416  *    not overextend its scheduling budget. This returns in
12417  *    execACC_CHECK_SCAN from where we send the NEXT_SCANREQ
12418  *    signal to the scanning block.
12419  * 2) Normal break to communicate with the application.
12420  *    The application will return by sending SCAN_NEXTREQ and
12421  *    thus this will happen in execSCAN_NEXTREQ.
12422  * 3) When we try to access a locked row in DBTUP and DBTUX and
12423  *    when we are waiting for too many rows in DBACC we will
12424  *    send the signal CHECK_LCP_STOP to DBLQH. This will cause
12425  *    1 millisecond sleep and we will return with a CONTINUEB
12426  *    signal in DBLQH that will the method checkLcpStopBlockedLab.
12427  * 4) When we hit a row lock in the scanning block we will send
12428  *    NEXT_SCANCONF with no rows returned. This signal is sent
12429  *    as a buffered signal. This gives the scan a chance to
12430  *    discover a quick lock release. This real-time break will
12431  *    be started in execNEXT_SCANCONF in DBLQH.
12432  * 5) In various places when we hit a row lock and similar things
12433  *    in DBACC we will send the signal ACC_CHECK_SCAN to DBLQH.
12434  *    DBLQH will send this signal back to DBACC after the real-time
12435  *    break. This is handled in execACC_CHECK_SCAN in DBLQH.
12436  * 6) When performing a long scan in DBTUP (normally an LCP scan),
12437  *    we can hit a limit on how many pages we are allowed to scan
12438  *    before taking a real-time break. This is handled as a CONTINUEB
12439  *    signal in DBTUP that will call setup_scan_pointers in DBLQH.
12440  * 7) During a copy fragment scan we will return with a LQHKEYCONF
12441  *    or LQHKEYREF to continue scanning. We will setup the
12442  *    infrastructure to continue the scan just before calling
12443  *    nextRecordCopy from copyCompletedLab. Most of the time we will
12444  *    not continue the scan from such a signal, so we postpone it
12445  *    as long as we can.
12446  * 8) After having our Copy fragment scan halted we return to
12447  *    executing the copy fragment scan when executing the
12448  *    execRESUME_COPY_FRAG_REQ method.
12449  * 9) As part of closing down a scan process we can send a signal
12450  *    ACC_ABORTCONF to ensure that all ACCKEYCONF and ACCKEYREF
12451  *    have been received before we close the scan. This is received
12452  *    in both DBTUP and DBTUX.
12453  * 10)We also setup scan pointers when executing LQH_TRANSREQ to
12454  *    ensure that we have always setup those pointers properly in
12455  *    all paths.
12456  * 11)We take a real-time break also when restarting a scan operation
12457  *    that have been released from the queue. This ensures that we
12458  *    never execute multiple scans in the same real-time break. This
12459  *    would complicate the handling of pointers considerably.
12460  *    We handle this special case by sending a CONTINUEB to ourselves
12461  *    and checking that no one completed the scan while we were
12462  *    in the job buffer.
12463  * 12) During scan in disk page order we can end up in
12464  *     disk_page_tup_scan_callback after retrieving the disk page.
12465  */
setup_key_pointers(Uint32 tcIndex)12466 void Dblqh::setup_key_pointers(Uint32 tcIndex)
12467 {
12468   /**
12469    * We come here after a real-time break for a key operation.
12470    * At this point we need to setup the pointers in preparation
12471    * for calling TUP and ACC and the pointers internally in LQH.
12472    */
12473   jamDebug();
12474   TcConnectionrecPtr tcConnectptr;
12475   FragrecordPtr fragPtr;
12476   tcConnectptr.i = tcIndex;
12477   ndbrequire(tcConnect_pool.getUncheckedPtrRW(tcConnectptr));
12478   c_tup->prepare_op_pointer(tcConnectptr.p->tupConnectrec,
12479                             tcConnectptr.p->tupConnectPtrP);
12480   c_fragment_pool.getPtr(fragPtr, tcConnectptr.p->fragmentptr);
12481   fragptr = fragPtr;
12482   m_tc_connect_ptr = tcConnectptr;
12483   ndbrequire(Magic::check_ptr(tcConnectptr.p));
12484   ndbrequire(Magic::check_ptr(tcConnectptr.p->tupConnectPtrP));
12485   ndbrequire(Magic::check_ptr(tcConnectptr.p->accConnectPtrP));
12486 }
12487 
12488 /**
12489  * After a real-time break it is necessary to setup the scan context
12490  * to avoid having to recompute these variables every time we need them.
12491  *
12492  * At reception of SCAN_FRAGREQ the scan context is setup bit by bit
12493  * rather than in one method since we don't have access to all variables
12494  * at start of the SCAN_FRAGREQ.
12495  *
12496  * The following variables are computed:
12497  *
12498  * scanptr
12499  *   This is the pointer and i-value stored in LQH block object for the scan
12500  *   record.
12501  * fragptr
12502  *   This is pointer and i-value stored in LQH block object for the index
12503  *   fragment being scanned.
12504  * prim_tab_fragptr
12505  *   This is pointer and i-value stored in LQH block object for the table
12506  *   fragment being scanned.
12507  * m_tc_connect_ptr
12508  *   This is the pointer and i-value of the TC connect record used by the scan
12509  *   record. There is always one TC connect record attached to each active scan
12510  *   record.
12511  *
12512  * TUP variables setup by prepare_op_pointer:
12513  * ..........................................
12514  *
12515  * prepare_op_pointer
12516  *   The pointer and i-value of the operation record that is connected to
12517  *   TC connect record in LQH.
12518  *
12519  * TUP variables setup by prepare_tab_pointers:
12520  * ............................................
12521  *
12522  * prepare_fragptr
12523  *   The pointer and i-value of the table fragment record used by the scan.
12524  * prepare_tabptr
12525  *   The pointer and i-value of the table record used by the scan.
12526  *
12527  * TUP variable setup by copyAttrinfo:
12528  * ...................................
12529  * The Attrinfo that contains the scan stored procedure is an Attrinfo program.
12530  * It is stored in signal segments while being executed. When TUP executes the
12531  * scan the Attrinfo program must be moved to a linear array. This linear array
12532  * is the cinBuffer variable in TUP. Using a linear array simplifies the code in
12533  * TUP greatly. Avoiding to copy this for each row we scan saves a lot of
12534  * computations.
12535  *
12536  * The method prepare_scan_ctx calls a method in ACC, TUP or TUX. This method sets
12537  * up the scan context for the block where the scan is performed. ACC is used for
12538  * full table scans. TUP is used for LCP scans and Node Restart scans. TUP can also
12539  * be called for full table scans from the NDB API (but not from SQL). TUX is used
12540  * for all range scans and represents the majority of the scans performed.
12541  * These methods describe what context they setup.
12542  * TUX scan context setup is described in DbtuxScan.cpp.
12543  *
12544  * Additional TUP variables to setup before calling execTUPKEYREQ
12545  * --------------------------------------------------------------
12546  * execTUPKEYREQ is used to read, update, delete, insert and read for scan rows
12547  * in TUP.
12548  * Before we can call this method we must also setup a few additional variables:
12549  *
12550  * prepare_pageptr
12551  *   This is the pointer and i-value of the page where the fixed part of the row
12552  *   resides.
12553  * prepare_tuple_ptr
12554  *   This is a pointer to the start of the tuple in the fixed part of the row.
12555  * prepare_page_no
12556  *   This is the physical page number of the fixed size page of the row.
12557  *
12558  * These are setup differently dependent on from where we come:
12559  * prepareTUPKEYREQ
12560  * ................
12561  *   This method is called after a real-time break for key operations. It also sets
12562  *   up prepare_tabptr and prepare_fragptr.
12563  * prepare_scanTUPKEYREQ
12564  * .....................
12565  *   This method is used when preparing to call execTUPKEYREQ from scans in TUP and
12566  *   ACC. This method gets a page id that is a logical page id since TUP has the
12567  *   logical page id since it scans in row id order and ACC stores the logical page
12568  *   id of the row. TUX stores the physical page id of the row instead to speed up
12569  *   things since it only requires the physical page id when reading the row.
12570  * prepare_scan_tux_TUPKEYREQ
12571  * ..........................
12572  *   Used when preparing to call execTUPKEYREQ from TUX scans.
12573  *
12574  * All these methods also try to prefetch the fixed part of the row to avoid cache
12575  * miss waits.
12576  */
setup_scan_pointers_from_tc_con(TcConnectionrecPtr tcConnectptr)12577 void Dblqh::setup_scan_pointers_from_tc_con(TcConnectionrecPtr tcConnectptr)
12578 {
12579   /**
12580    * We come here after a real-time break, we need to setup
12581    * infrastructure in TUP and here in LQH for execution of
12582    * direct signals.
12583    */
12584   jamDebug();
12585   FragrecordPtr loc_fragptr;
12586   FragrecordPtr loc_prim_tab_fragptr;
12587   ScanRecordPtr loc_scanptr;
12588   loc_scanptr.i = tcConnectptr.p->tcScanRec;
12589   loc_fragptr.i = tcConnectptr.p->fragmentptr;
12590   ndbrequire(c_scanRecordPool.getUncheckedPtrRW(loc_scanptr));
12591   loc_prim_tab_fragptr.i = loc_scanptr.p->fragPtrI;
12592   c_fragment_pool.getPtr(loc_fragptr);
12593   m_scan_direct_count = 1;
12594   m_tot_scan_direct_count = 0;
12595   SimulatedBlock *block = loc_scanptr.p->scanBlock;
12596   scanptr = loc_scanptr;
12597   fragptr = loc_fragptr;
12598   c_fragment_pool.getPtr(loc_prim_tab_fragptr);
12599   c_tup->prepare_op_pointer(tcConnectptr.p->tupConnectrec,
12600                             tcConnectptr.p->tupConnectPtrP);
12601   m_tc_connect_ptr = tcConnectptr;
12602   prim_tab_fragptr = loc_prim_tab_fragptr;
12603   c_tup->prepare_tab_pointers(loc_prim_tab_fragptr.p->tupFragptr);
12604   if (likely(loc_scanptr.p->scanStoredProcId != RNIL))
12605   {
12606     jamDebug();
12607     Uint32 storedProcLen =
12608       c_tup->copyAttrinfo(loc_scanptr.p->scanStoredProcId);
12609     (void)storedProcLen;
12610     ndbassert(loc_scanptr.p->scanAiLength == storedProcLen);
12611   }
12612   if (likely(loc_scanptr.p->scanAccPtr != RNIL))
12613   {
12614     jamDebug();
12615     block->prepare_scan_ctx(loc_scanptr.p->scanAccPtr);
12616   }
12617   ndbrequire(Magic::check_ptr(loc_scanptr.p));
12618   ndbrequire(Magic::check_ptr(tcConnectptr.p->tupConnectPtrP));
12619   ndbrequire(Magic::check_ptr(tcConnectptr.p->accConnectPtrP));
12620 }
12621 
setup_scan_pointers(Uint32 scanPtrI)12622 void Dblqh::setup_scan_pointers(Uint32 scanPtrI)
12623 {
12624   /**
12625    * We come here after a real-time break, we need to setup
12626    * infrastructure in TUP and here in LQH for execution of
12627    * direct signals.
12628    */
12629   jamDebug();
12630   FragrecordPtr loc_fragptr;
12631   FragrecordPtr loc_prim_tab_fragptr;
12632   ScanRecordPtr loc_scanptr;
12633   TcConnectionrecPtr loc_tcConnectptr;
12634   loc_scanptr.i = scanPtrI;
12635   ndbrequire(c_scanRecordPool.getUncheckedPtrRW(loc_scanptr));
12636   loc_tcConnectptr.i = loc_scanptr.p->scanTcrec;
12637   loc_prim_tab_fragptr.i = loc_scanptr.p->fragPtrI;
12638   ndbrequire(tcConnect_pool.getUncheckedPtrRW(loc_tcConnectptr));
12639   loc_fragptr.i = loc_tcConnectptr.p->fragmentptr;
12640   c_fragment_pool.getPtr(loc_prim_tab_fragptr);
12641   c_fragment_pool.getPtr(loc_fragptr);
12642   m_scan_direct_count = 1;
12643   m_tot_scan_direct_count = 0;
12644   prim_tab_fragptr = loc_prim_tab_fragptr;
12645   scanptr = loc_scanptr;
12646   SimulatedBlock *block = loc_scanptr.p->scanBlock;
12647   c_tup->prepare_op_pointer(loc_tcConnectptr.p->tupConnectrec,
12648                             loc_tcConnectptr.p->tupConnectPtrP);
12649   fragptr = loc_fragptr;
12650   m_tc_connect_ptr = loc_tcConnectptr;
12651   c_tup->prepare_tab_pointers(loc_prim_tab_fragptr.p->tupFragptr);
12652   if (likely(loc_scanptr.p->scanStoredProcId != RNIL))
12653   {
12654     jamDebug();
12655     Uint32 storedProcLen =
12656       c_tup->copyAttrinfo(loc_scanptr.p->scanStoredProcId);
12657     (void)storedProcLen;
12658     ndbassert(loc_scanptr.p->scanAiLength == storedProcLen);
12659   }
12660   if (likely(loc_scanptr.p->scanAccPtr != RNIL))
12661   {
12662     jamDebug();
12663     block->prepare_scan_ctx(loc_scanptr.p->scanAccPtr);
12664   }
12665   ndbrequire(Magic::check_ptr(loc_scanptr.p));
12666   ndbrequire(Magic::check_ptr(loc_tcConnectptr.p));
12667   ndbrequire(Magic::check_ptr(loc_tcConnectptr.p->tupConnectPtrP));
12668   ndbrequire(Magic::check_ptr(loc_tcConnectptr.p->accConnectPtrP));
12669 }
12670 
checkLcpStopBlockedLab(Signal * signal,Uint32 scanPtrI)12671 void Dblqh::checkLcpStopBlockedLab(Signal* signal, Uint32 scanPtrI)
12672 {
12673   /**
12674    * We are back from a real-time break, we need to
12675    * setup the pointer infrastructure before starting
12676    * off the execution.
12677    */
12678   setup_scan_pointers(scanPtrI);
12679   Fragrecord::FragStatus fragstatus = fragptr.p->fragStatus;
12680   ScanRecord * const scanPtr = scanptr.p;
12681   BlockReference blockRef = scanPtr->scanBlockref;
12682   scanPtr->scan_lastSeen = __LINE__;
12683   signal->theData[0] = scanPtr->scanAccPtr;
12684   signal->theData[1] = AccCheckScan::ZNOT_CHECK_LCP_STOP;
12685   ndbrequire(is_scan_ok(scanPtr, fragstatus));
12686   EXECUTE_DIRECT(refToMain(blockRef),
12687                  GSN_ACC_CHECK_SCAN,
12688                  signal,
12689                  2);
12690 }//Dblqh::checkLcpStopBlockedLab()
12691 
execACC_CHECK_SCAN(Signal * signal)12692 void Dblqh::execACC_CHECK_SCAN(Signal *signal)
12693 {
12694   /**
12695    * A real-time break was invoked in the middle of the scan
12696    * processing.
12697    *
12698    * This could either be invoked by DBACC due to some lock
12699    * wait issue or it could be a normal scheduling break
12700    * invoked since we have executed too many signals in a
12701    * row. In this case it is the NEXT_SCANREQ signal that
12702    * is sent.
12703    */
12704   jamDebug();
12705   Uint32 scanPtrI = signal->theData[0];
12706   setup_scan_pointers(scanPtrI);
12707   Uint32 sig_number = signal->theData[1];
12708   ScanRecord *scanPtr = scanptr.p;
12709   BlockReference ref = scanPtr->scanBlockref;
12710   if (sig_number == GSN_NEXT_SCANREQ)
12711   {
12712     jamDebug();
12713     signal->theData[0] = scanPtr->scanAccPtr;
12714     signal->theData[1] = signal->theData[2];
12715     signal->theData[2] = signal->theData[3];
12716     EXECUTE_DIRECT(refToMain(ref),
12717                    GSN_NEXT_SCANREQ,
12718                    signal,
12719                    3);
12720   }
12721   else
12722   {
12723     jamDebug();
12724     ndbrequire(sig_number == GSN_ACC_CHECK_SCAN);
12725     signal->theData[0] = scanPtr->scanAccPtr;
12726     signal->theData[1] = signal->theData[2];
12727     EXECUTE_DIRECT(refToMain(ref),
12728                    GSN_ACC_CHECK_SCAN,
12729                    signal,
12730                    2);
12731   }
12732 }
12733 
12734 /* ***************>> */
12735 /*  NEXT_SCANCONF  > */
12736 /* ***************>> */
execNEXT_SCANCONF(Signal * signal)12737 void Dblqh::execNEXT_SCANCONF(Signal* signal)
12738 {
12739   /**
12740    * The scan block sent an asynchronous signal, when this arrives
12741    * we have had a real-time break, so we need to setup the scan
12742    * execution environment again.
12743    */
12744   NextScanConf * const nextScanConf = (NextScanConf *)&signal->theData[0];
12745   setup_scan_pointers(nextScanConf->scanPtr);
12746   const Uint32 pageNo = nextScanConf->localKey[0];
12747   const Uint32 pageIdx = nextScanConf->localKey[1];
12748   jamEntryDebug();
12749 
12750   ScanRecord * const scanPtr = scanptr.p;
12751   scanPtr->m_row_id.m_page_idx = pageIdx;
12752   scanPtr->m_row_id.m_page_no = pageNo;
12753   continue_next_scan_conf(signal,
12754                           scanPtr->scanState,
12755                           scanPtr);
12756 }
12757 
exec_next_scan_conf(Signal * signal)12758 void Dblqh::exec_next_scan_conf(Signal *signal)
12759 {
12760   /**
12761    * The scan block sent an immediate signal requiring no
12762    * real-time break.
12763    */
12764   jamDebug();
12765   NextScanConf * const nextScanConf = (NextScanConf *)&signal->theData[0];
12766   ScanRecord * const scanPtr = scanptr.p;
12767   const Uint32 pageNo = nextScanConf->localKey[0];
12768   const Uint32 pageIdx = nextScanConf->localKey[1];
12769   /**
12770    * The local key is a row id when scanning ACC and TUP. In TUX we store
12771    * the physical row id and we don't need the row id for anything in an
12772    * ordered index scan. So in the case of a TUX scan we will return a
12773    * physical row id and not the logical row id.
12774    */
12775   scanPtr->m_row_id.m_page_idx = pageIdx;
12776   scanPtr->m_row_id.m_page_no = pageNo;
12777   continue_next_scan_conf(signal,
12778                           scanPtr->scanState,
12779                           scanPtr);
12780 }
12781 
continue_next_scan_conf(Signal * signal,ScanRecord::ScanState scanState,ScanRecord * const scanPtr)12782 void Dblqh::continue_next_scan_conf(Signal *signal,
12783                                     ScanRecord::ScanState scanState,
12784                                     ScanRecord * const scanPtr)
12785 {
12786 #ifdef VM_TRACE
12787   NextScanConf * const nextScanConf = (NextScanConf *)&signal->theData[0];
12788   if (signal->getLength() > 2 && nextScanConf->accOperationPtr != RNIL)
12789   {
12790     Ptr<TcConnectionrec> regTcPtr;
12791     regTcPtr.i = scanPtr->scanTcrec;
12792     ndbrequire(tcConnect_pool.getValidPtr(regTcPtr));
12793     ndbassert(regTcPtr.p->fragmentid == nextScanConf->fragId);
12794   }
12795 #endif
12796   switch (scanState) {
12797   case ScanRecord::WAIT_NEXT_SCAN:
12798   {
12799     jamDebug();
12800     NextScanConf * const nextScanConf = (NextScanConf *)&signal->theData[0];
12801     nextScanConfScanLab(signal,
12802                         scanPtr,
12803                         nextScanConf->fragId,
12804                         nextScanConf->accOperationPtr,
12805                         m_tc_connect_ptr);
12806     return;
12807   }
12808   case ScanRecord::WAIT_NEXT_SCAN_COPY:
12809     jamDebug();
12810     nextScanConfCopyLab(signal, m_tc_connect_ptr);
12811     return;
12812   case ScanRecord::WAIT_CLOSE_SCAN:
12813     jamDebug();
12814     accScanCloseConfLab(signal, m_tc_connect_ptr);
12815     return;
12816   case ScanRecord::WAIT_CLOSE_COPY:
12817     jamDebug();
12818     accCopyCloseConfLab(signal, m_tc_connect_ptr);
12819     return;
12820   default:
12821     jamLine(scanPtr->scanState);
12822     ndbabort();
12823   }//switch
12824 }
12825 
12826 /* --------------------------------------------------------------------------
12827  *       ENTER SCAN_NEXTREQ
12828  * --------------------------------------------------------------------------
12829  *       PRECONDITION:
12830  *       TRANSACTION_STATE = SCAN_STATE
12831  *       SCAN_STATE = WAIT_SCAN_NEXTREQ
12832  *
12833  * Case scanLockHold: ZTRUE  = Unlock previous round of
12834  *                             scanned row(s) and fetch next set of rows.
12835  *                    ZFALSE = Fetch new set of rows.
12836  * Number of rows to read depends on parallelism and how many rows
12837  * left to scan in the fragment. SCAN_NEXTREQ can also be sent with
12838  * closeFlag == ZTRUE to close the scan.
12839  * ------------------------------------------------------------------------- */
execSCAN_NEXTREQ(Signal * signal)12840 void Dblqh::execSCAN_NEXTREQ(Signal* signal)
12841 {
12842   jamEntry();
12843   const ScanFragNextReq * const nextReq =
12844                                 (ScanFragNextReq*)&signal->theData[0];
12845   const Uint32 transid1 = nextReq->transId1;
12846   const Uint32 transid2 = nextReq->transId2;
12847   const Uint32 senderData = nextReq->senderData;
12848   Uint32 hashHi = signal->getSendersBlockRef();
12849   // bug#13834481 hashHi!=0 caused timeout (tx not found)
12850 
12851   TcConnectionrecPtr tcConnectptr;
12852   if (unlikely(findTransaction(transid1,
12853                                transid2,
12854                                senderData,
12855                                hashHi,
12856                                tcConnectptr) != ZOK))
12857   {
12858     jam();
12859     LQH_DEBUG(senderData <<
12860 	  " Received SCAN_NEXTREQ in LQH with close flag when closed");
12861     ndbrequire(ScanFragNextReq::getCloseFlag(nextReq->requestInfo));
12862     return;
12863   }
12864 
12865   // Crash node if signal sender is same node
12866   CRASH_INSERTION2(5021, refToNode(signal->senderBlockRef()) == cownNodeid);
12867   // Crash node if signal sender is NOT same node
12868   CRASH_INSERTION2(5022, refToNode(signal->senderBlockRef()) != cownNodeid);
12869 
12870   if (ERROR_INSERTED(5023)){
12871     // Drop signal if sender is same node
12872     if (refToNode(signal->senderBlockRef()) == cownNodeid &&
12873         refToBlock(signal->senderBlockRef()) != BACKUP)
12874     {
12875       CLEAR_ERROR_INSERT_VALUE;
12876       return;
12877     }
12878   }//if
12879   if (ERROR_INSERTED(5024)){
12880     // Drop signal if sender is NOT same node
12881     if (refToNode(signal->senderBlockRef()) != cownNodeid) {
12882       CLEAR_ERROR_INSERT_VALUE;
12883       return;
12884     }
12885   }//if
12886   if (ERROR_INSERTED(5025))
12887   {
12888     /**
12889      * This does not work as signal->getSendersBlockRef() is used
12890      *   as "hashHi"...not having a real data-word for this is not optimal
12891      *   but it will work...summary: disable this ERROR_INSERT
12892      */
12893     CLEAR_ERROR_INSERT_VALUE;
12894   }
12895 
12896   if (ERROR_INSERTED(5030)){
12897     if (refToBlock(signal->senderBlockRef()) != BACKUP)
12898     {
12899       ndbout << "ERROR 5030" << endl;
12900       CLEAR_ERROR_INSERT_VALUE;
12901       // Drop signal
12902       return;
12903     }
12904   }//if
12905 
12906   Uint32 pos = 0;
12907   if (ScanFragNextReq::getCorrFactorFlag(nextReq->requestInfo))
12908   {
12909     jamDebug();
12910     Uint32 corrFactorLo = nextReq->variableData[pos++];
12911     tcConnectptr.p->m_corrFactorLo &= 0xFFFF0000;
12912     tcConnectptr.p->m_corrFactorLo |= corrFactorLo;
12913   }
12914 
12915   setup_scan_pointers_from_tc_con(tcConnectptr);
12916   scanptr.p->scanTcWaiting = cLqhTimeOutCount;
12917   /* ------------------------------------------------------------------
12918    * If close flag is set this scan should be closed
12919    * If we are waiting for SCAN_NEXTREQ set flag to stop scanning and
12920    * continue execution else set flags and wait until the scan
12921    * completes itself
12922    * ------------------------------------------------------------------ */
12923   if (ScanFragNextReq::getCloseFlag(nextReq->requestInfo))
12924   {
12925     jamDebug();
12926     if(ERROR_INSERTED(5034)){
12927       CLEAR_ERROR_INSERT_VALUE;
12928     }
12929     closeScanRequestLab(signal, tcConnectptr, true);
12930     return;
12931   }//if
12932   scanptr.p->prioAFlag = ScanFragNextReq::getPrioAFlag(nextReq->requestInfo);
12933   scanptr.p->m_exec_direct_batch_size_words = 0;
12934 
12935   ScanRecord * const scanPtr = scanptr.p;
12936   const Uint32 max_rows = nextReq->batch_size_rows;
12937   const Uint32 max_bytes = nextReq->batch_size_bytes;
12938   scanPtr->m_max_batch_size_bytes = max_bytes;
12939   {
12940     /**
12941      * To speed up drop table we check for table being dropped here.
12942      * This can speed up drop table by minutes, so even though it is
12943      * a small cost at every scan batch, it will provide us a much
12944      * more reliable time of execution for drop tables. Drop table
12945      * can also be delayed by user transactions. So we could potentially
12946      * make this check for all scan types. This will however require
12947      * user transactions to add checks for this error, this they should
12948      * however already have since it is checked at first SCAN_FRAGREQ.
12949      *
12950      * We don't need to worry about backups since they will take a lock
12951      * before they start, so this won't happen to a backup. No drop tables
12952      * can run concurrently with drop table. Also any ALTER TABLE activity
12953      * cannot run at the same time as a drop table, so this is also safe
12954      * for all sorts of table reorg scans. What remains is node recovery
12955      * scans to synchronize data.
12956      */
12957     TablerecPtr tabPtr;
12958     tabPtr.i = tcConnectptr.p->tableref;
12959     ptrCheckGuard(tabPtr, ctabrecFileSize, tablerec);
12960     if (unlikely(tabPtr.p->tableStatus != Tablerec::TABLE_DEFINED &&
12961                  tabPtr.p->tableStatus != Tablerec::TABLE_READ_ONLY))
12962     {
12963       tcConnectptr.p->errorCode = get_table_state_error(tabPtr);
12964       closeScanRequestLab(signal, tcConnectptr, true);
12965       return;
12966     }
12967   }
12968   if (unlikely(max_rows > scanPtr->m_max_batch_size_rows))
12969   {
12970     jam();
12971     /**
12972      * Extend list...
12973      * Will never happen for LCP, Backup and NR.
12974      */
12975     ndbrequire(scanPtr->m_reserved == 0);
12976     if (unlikely(!seize_acc_ptr_list(scanPtr,
12977                             scanPtr->m_max_batch_size_rows, max_rows)))
12978     {
12979       jam();
12980       tcConnectptr.p->errorCode = ScanFragRef::ZTOO_MANY_ACTIVE_SCAN_ERROR;
12981       closeScanRequestLab(signal, tcConnectptr, true);
12982       return;
12983     }
12984     scanPtr->m_max_batch_size_rows = max_rows;
12985   }
12986   else if (unlikely(max_rows < scanPtr->m_max_batch_size_rows))
12987   {
12988     jam();
12989     scanPtr->m_max_batch_size_rows = max_rows;
12990   }
12991 
12992   /* --------------------------------------------------------------------
12993    * If scanLockHold = TRUE we need to unlock previous round of
12994    * scanned records.
12995    * scanReleaseLocks will set states for this and send a NEXT_SCANREQ.
12996    * When confirm signal NEXT_SCANCONF arrives we call
12997    * continueScanNextReqLab to continue scanning new rows and
12998    * acquiring new locks.
12999    * -------------------------------------------------------------------- */
13000   if (unlikely((scanPtr->scanLockHold == ZTRUE) &&
13001                (scanPtr->m_curr_batch_size_rows > 0)))
13002   {
13003     jam();
13004     scanPtr->scanReleaseCounter = 1;
13005     scanReleaseLocksLab(signal, tcConnectptr.p);
13006     return;
13007   }//if
13008 
13009   /* -----------------------------------------------------------------------
13010    * We end up here when scanLockHold = FALSE or no rows was locked from
13011    * previous round.
13012    * Simply continue scanning.
13013    * ----------------------------------------------------------------------- */
13014   continueScanNextReqLab(signal, tcConnectptr.p);
13015 }//Dblqh::execSCAN_NEXTREQ()
13016 
continueScanNextReqLab(Signal * signal,TcConnectionrec * const regTcPtr)13017 void Dblqh::continueScanNextReqLab(Signal* signal,
13018                                    TcConnectionrec* const regTcPtr)
13019 {
13020   ScanRecord * const scanPtr = scanptr.p;
13021   if (unlikely(scanPtr->scanCompletedStatus == ZTRUE))
13022   {
13023     jam();
13024     closeScanLab(signal, regTcPtr);
13025     return;
13026   }//if
13027 
13028   if(scanPtr->m_last_row)
13029   {
13030     jamDebug();
13031     scanPtr->scanCompletedStatus = ZTRUE;
13032     scanPtr->scanState = ScanRecord::WAIT_SCAN_NEXTREQ;
13033     scanPtr->scan_lastSeen = __LINE__;
13034     sendScanFragConf(signal, ZFALSE, regTcPtr);
13035     return;
13036   }
13037 
13038   // Update timer on tcConnectRecord
13039   regTcPtr->tcTimer = cLqhTimeOutCount;
13040   init_acc_ptr_list(scanPtr);
13041   scanPtr->scanFlag = NextScanReq::ZSCAN_NEXT;
13042   scanNextLoopLab(signal,
13043                   regTcPtr->clientConnectrec,
13044                   RNIL,
13045                   scanPtr,
13046                   fragptr.p);
13047 }//Dblqh::continueScanNextReqLab()
13048 
scanNextLoopLab(Signal * signal,Uint32 clientPtrI,Uint32 accOpPtr,ScanRecord * const scanPtr,Fragrecord * const fragPtr)13049 void Dblqh::scanNextLoopLab(Signal* signal,
13050                             Uint32 clientPtrI,
13051                             Uint32 accOpPtr,
13052                             ScanRecord * const scanPtr,
13053                             Fragrecord * const fragPtr)
13054 {
13055   Fragrecord::FragStatus fragstatus = fragPtr->fragStatus;
13056   Uint32 scanFlag = scanPtr->scanFlag;
13057   const Uint32 sig0 = scanPtr->scanAccPtr;
13058   SimulatedBlock *block = scanPtr->scanBlock;
13059   ExecFunction f = scanPtr->scanFunction_NEXT_SCANREQ;
13060   Uint32 in_send_next_scan = m_in_send_next_scan;
13061 
13062   signal->theData[0] = sig0;
13063   signal->theData[1] = accOpPtr;
13064   signal->theData[2] = scanFlag;
13065 
13066   ndbrequire(is_scan_ok(scanPtr, fragstatus));
13067   scanPtr->scanState = ScanRecord::WAIT_NEXT_SCAN;
13068   scanPtr->scan_lastSeen = __LINE__;
13069   if (unlikely(in_send_next_scan == 0))
13070   {
13071     send_next_NEXT_SCANREQ(signal,
13072                            block,
13073                            f,
13074                            scanPtr,
13075                            clientPtrI);
13076     return;
13077   }
13078   /**
13079    * At this point we don't call send_next_NEXT_SCANREQ since we
13080    * want to unwind the call stack before entering this function.
13081    * m_in_send_next_scan equal to 1 indicates that we are
13082    * executing this function already and that we will check the
13083    * result of this function when we return to send_next_NEXT_SCANREQ
13084    * after unwinding the stack.
13085    *
13086    * It is imperative that we return immediately in all the call
13087    * stack until we return to send_next_NEXT_SCANREQ again. This
13088    * secures the signal object that is setup already to send the
13089    * NEXT_SCANREQ signal.
13090    *
13091    * We indicate that we have another signal to process by setting
13092    * m_in_send_next_scan to 2.
13093    */
13094   ndbassert(in_send_next_scan == 1);
13095   m_in_send_next_scan = 2;
13096 }//Dblqh::scanNextLoopLab()
13097 
scanLockReleasedLab(Signal * signal,TcConnectionrec * const regTcPtr)13098 void Dblqh::scanLockReleasedLab(Signal* signal,
13099                                 TcConnectionrec* const regTcPtr)
13100 {
13101   ScanRecord * const scanPtr = scanptr.p;
13102   if (scanPtr->scanReleaseCounter == scanPtr->m_curr_batch_size_rows) {
13103     if ((scanPtr->scanErrorCounter > 0) ||
13104         (scanPtr->scanCompletedStatus == ZTRUE)) {
13105       jam();
13106       scanPtr->m_curr_batch_size_rows = 0;
13107       scanPtr->m_curr_batch_size_bytes = 0;
13108       closeScanLab(signal, regTcPtr);
13109     } else if (scanPtr->m_last_row && !scanPtr->scanLockHold) {
13110       jam();
13111       closeScanLab(signal, regTcPtr);
13112     } else if (scanPtr->check_scan_batch_completed() &&
13113                scanPtr->scanLockHold != ZTRUE) {
13114       jam();
13115       scanPtr->scanState = ScanRecord::WAIT_SCAN_NEXTREQ;
13116       scanPtr->scan_lastSeen = __LINE__;
13117       sendScanFragConf(signal, ZFALSE, regTcPtr);
13118     } else {
13119       jam();
13120       /*
13121        * We came here after releasing locks after
13122        * receiving SCAN_NEXTREQ from TC. We only come here
13123        * when scanHoldLock == ZTRUE
13124        */
13125       scanPtr->m_curr_batch_size_rows = 0;
13126       scanPtr->m_curr_batch_size_bytes = 0;
13127       continueScanNextReqLab(signal, regTcPtr);
13128       return;
13129     }//if
13130   }
13131   else if (scanPtr->scanCompletedStatus != ZTRUE)
13132   {
13133     jam();
13134     /*
13135     We come here when we have been scanning for a long time and not been able
13136     to find m_max_batch_size_rows records to return. We needed to release
13137     the record we didn't want, but now we are returning all found records to
13138     the API.
13139     */
13140     scanPtr->scanState = ScanRecord::WAIT_SCAN_NEXTREQ;
13141     scanPtr->scan_lastSeen = __LINE__;
13142     sendScanFragConf(signal, ZFALSE, regTcPtr);
13143   }
13144   else
13145   {
13146     jam();
13147     closeScanLab(signal, regTcPtr);
13148   }
13149   return;
13150 }//Dblqh::scanLockReleasedLab()
13151 
13152 /* -------------------------------------------------------------------------
13153  *       WE NEED TO RELEASE LOCKS BEFORE CONTINUING
13154  * ------------------------------------------------------------------------- */
scanReleaseLocksLab(Signal * signal,TcConnectionrec * const regTcPtr)13155 void Dblqh::scanReleaseLocksLab(Signal* signal,
13156                                 TcConnectionrec* const regTcPtr)
13157 {
13158   ScanRecord * const scanPtr = scanptr.p;
13159   Fragrecord::FragStatus fragstatus = fragptr.p->fragStatus;
13160   ndbrequire(is_scan_ok(scanPtr, fragstatus));
13161   check_send_scan_hb_rep(signal, scanPtr, regTcPtr);
13162   while (true)
13163   {
13164     const Uint32 sig1 =
13165       get_acc_ptr_from_scan_record(scanPtr,
13166                                    scanPtr->scanReleaseCounter-1,
13167                                    false);
13168     const Uint32 sig0 = scanPtr->scanAccPtr;
13169     SimulatedBlock *block = scanPtr->scanBlock;
13170     ExecFunction f = scanPtr->scanFunction_NEXT_SCANREQ;
13171 
13172     signal->theData[1] = sig1;
13173     signal->theData[2] = NextScanReq::ZSCAN_COMMIT;
13174     signal->theData[0] = sig0;
13175     /* EXECUTE_DIRECT optimised to NEXT_SCANREQ in TUP/ACC/TUX */
13176 
13177     /**
13178      * DESIGN PATTERN DESCRIPTION:
13179      * ---------------------------
13180      * When calling another block immediately there is a set of ways to
13181      * do this.
13182      *
13183      * Standard, non-optimised manner:
13184      * Use EXECUTE_DIRECT with four parameters that specify block reference
13185      * of receiver, the signal object, the global signal number and the
13186      * length of the signal (There is also a variant used when sending such
13187      * a signal to a different instance). This method is fairly optimised
13188      * but has quite a lot of potential for performance improvement.
13189      *
13190      * Standard, optimised manner:
13191      * In this case we optimise things by translating to block object and
13192      * retrieving the function pointer in the block call. This gives
13193      * the compiler assistance to separate the loads and stores more from
13194      * each other.
13195      * The call will be seen as:
13196      * block->EXECUTE_DIRECT(signal, f);
13197      *
13198      * This manner optimises the code but retains the flexibility and also
13199      * the possibility to trace signal execution between blocks.
13200      *
13201      * Non-standard, optimised manner:
13202      * In this case we remove some of the flexibility of the call to enhance
13203      * the performance yet a bit more. In this case we remove the possibility
13204      * for a flexible receiver of the signal, it is directed to a certain
13205      * block and we also call the method directly without any indirection.
13206      * The call will be seen as e.g.:
13207      * c_tup->execTUPKEYREQ(signal);
13208      *
13209      * The standard manner of calling EXECUTE_DIRECT are both always calling
13210      * functions with one parameter being the signal object and no return
13211      * value. There is however two ways of sending signals back. In some
13212      * cases a signal is always sent back when returning from the
13213      * EXECUTE_DIRECT call, in this case the signal object returned contains
13214      * a signal object with data for the return signal. In some cases
13215      * one gets a return signal in this manner by a combination of the signal
13216      * number and the parameters. In the example below with NEXT_SCANREQ
13217      * we always gets a return signal when specifying ZSCAN_COMMIT
13218      * but not in other cases.
13219      *
13220      * The other manner of sending a return signal is to perform a new
13221      * EXECUTE_DIRECT signal. In those cases one needs to ensure that the
13222      * call chain is bounded to not run out of stack. It is also a good
13223      * idea to try and ensure that the EXECUTE_DIRECT can use the
13224      * tail-call optimisation to avoid using too much stack which is bad
13225      * for CPU caching. This means returning immediately after
13226      * EXECUTE_DIRECT but also avoiding having objects that needs
13227      * destruction after return and also avoiding taking the reference of
13228      * stack variables.
13229      *
13230      * Using the non-standard manner one can obviously also change more
13231      * ways, one can return a bool for example as in the example with
13232      * execTUPKEYREQ, one can add parameters and one can even change the
13233      * name to a different name not according to the standard naming
13234      * conventions. Obviously doing this removes flexibility of using
13235      * blocks in a flexible manner.
13236      */
13237     block->EXECUTE_DIRECT_FN(f, signal);
13238     ndbrequire(signal->theData[0] == 0); /* Failure is not an option */
13239     if (scanPtr->scanReleaseCounter < scanPtr->m_curr_batch_size_rows)
13240     {
13241       jam();
13242       scanPtr->scanReleaseCounter++;
13243       /* Continue looping */
13244     }
13245     else
13246     {
13247       scanLockReleasedLab(signal, regTcPtr);
13248       /* No more records to commit for this scan at this time */
13249       return;
13250     }
13251   }
13252 }//Dblqh::scanReleaseLocksLab()
13253 
13254 /* -------------------------------------------------------------------------
13255  *       ENTER SCAN_NEXTREQ
13256  * -------------------------------------------------------------------------
13257  *       SCAN_NEXT_REQ SIGNAL ARRIVED IN THE MIDDLE OF EXECUTION OF THE SCAN.
13258  *       IT WAS A REQUEST TO CLOSE THE SCAN. WE WILL CLOSE THE SCAN IN A
13259  *       CAREFUL MANNER TO ENSURE THAT NO ERROR OCCURS.
13260  * -------------------------------------------------------------------------
13261  *       PRECONDITION:
13262  *       TRANSACTION_STATE = SCAN_STATE_USED
13263  *       TSCAN_COMPLETED = ZTRUE
13264  * -------------------------------------------------------------------------
13265  *       WE CAN ALSO ARRIVE AT THIS LABEL AFTER A NODE CRASH OF THE SCAN
13266  *       COORDINATOR.
13267  * ------------------------------------------------------------------------- */
closeScanRequestLab(Signal * signal,const TcConnectionrecPtr tcConnectptr,bool setup)13268 void Dblqh::closeScanRequestLab(Signal* signal,
13269                                 const TcConnectionrecPtr tcConnectptr,
13270                                 bool setup)
13271 {
13272   if (!setup)
13273   {
13274     jamDebug();
13275     setup_scan_pointers_from_tc_con(tcConnectptr);
13276   }
13277   ScanRecord * const scanPtr = scanptr.p;
13278   LQH_DEBUG("transactionState = " << tcConnectptr.p->transactionState);
13279   switch (tcConnectptr.p->transactionState) {
13280   case TcConnectionrec::SCAN_STATE_USED:
13281     LQH_DEBUG("scanState = " << scanPtr->scanState);
13282     switch (scanPtr->scanState) {
13283     case ScanRecord::IN_QUEUE:
13284       jam();
13285       tupScanCloseConfLab(signal, tcConnectptr);
13286       return;
13287     case ScanRecord::WAIT_NEXT_SCAN:
13288       jam();
13289       /* -------------------------------------------------------------------
13290        *  SET COMPLETION STATUS AND WAIT FOR OPPORTUNITY TO STOP THE SCAN.
13291        * ------------------------------------------------------------------- */
13292       scanPtr->scanCompletedStatus = ZTRUE;
13293       return;
13294     case ScanRecord::WAIT_START_QUEUED_SCAN:
13295       jam();
13296       /**
13297        * We are currently starting up a queued scan, need to retain
13298        * scan record until this signal arrives back.
13299        */
13300       tupScanCloseConfLab(signal, tcConnectptr);
13301       return;
13302     case ScanRecord::QUIT_START_QUEUE_SCAN:
13303       jam();
13304       /**
13305        * Scan is already closed, but waiting for a CONTINUEB signal, let
13306        * that be handled and do no more. Response back to TC about
13307        * closed state has already been sent, so no need to send it again.
13308        */
13309       return;
13310     case ScanRecord::WAIT_ACC_SCAN:
13311       jam();
13312       /* -------------------------------------------------------------------
13313        *  WE ARE CURRENTLY STARTING UP THE SCAN. SET COMPLETED STATUS
13314        *  AND WAIT FOR COMPLETION OF STARTUP.
13315        * ------------------------------------------------------------------- */
13316       scanPtr->scanCompletedStatus = ZTRUE;
13317       return;
13318     case ScanRecord::WAIT_CLOSE_SCAN:
13319       jam();
13320       scanPtr->scanCompletedStatus = ZTRUE;
13321       return;
13322       /* -------------------------------------------------------------------
13323        *       CLOSE IS ALREADY ONGOING. WE NEED NOT DO ANYTHING.
13324        * ------------------------------------------------------------------- */
13325     case ScanRecord::WAIT_SCAN_NEXTREQ:
13326       jam();
13327       /* -------------------------------------------------------------------
13328        * WE ARE WAITING FOR A SCAN_NEXTREQ FROM SCAN COORDINATOR(TC)
13329        * WICH HAVE CRASHED. CLOSE THE SCAN
13330        * ------------------------------------------------------------------- */
13331       scanPtr->scanCompletedStatus = ZTRUE;
13332 
13333       if (scanPtr->scanLockHold == ZTRUE) {
13334 	if (scanPtr->m_curr_batch_size_rows > 0) {
13335 	  jam();
13336 	  scanPtr->scanReleaseCounter = 1;
13337 	  scanReleaseLocksLab(signal, tcConnectptr.p);
13338 	  return;
13339 	}//if
13340       }//if
13341       closeScanLab(signal, tcConnectptr.p);
13342       return;
13343     default:
13344       ndbabort();
13345     }//switch
13346     return;
13347   case TcConnectionrec::WAIT_SCAN_AI:
13348     jam();
13349     /* ---------------------------------------------------------------------
13350      *  WE ARE STILL WAITING FOR THE ATTRIBUTE INFORMATION THAT
13351      *  OBVIOUSLY WILL NOT ARRIVE. WE CAN QUIT IMMEDIATELY HERE.
13352      * --------------------------------------------------------------------- */
13353     tupScanCloseConfLab(signal, tcConnectptr);
13354     return;
13355   case TcConnectionrec::SCAN_TUPKEY:
13356     jam();
13357     /* ---------------------------------------------------------------------
13358      *       SET COMPLETION STATUS AND WAIT FOR OPPORTUNITY TO STOP THE SCAN.
13359      * --------------------------------------------------------------------- */
13360     scanPtr->scanCompletedStatus = ZTRUE;
13361     return;
13362   default:
13363     ndbabort();
13364   }//switch
13365 }//Dblqh::closeScanRequestLab()
13366 
13367 bool
seize_acc_ptr_list(ScanRecord * scanP,Uint32 curr_batch_size,Uint32 new_batch_size)13368 Dblqh::seize_acc_ptr_list(ScanRecord* scanP,
13369                           Uint32 curr_batch_size,
13370                           Uint32 new_batch_size)
13371 {
13372   /*  1 maps to 0 segments
13373    * >1 maps to enough segments to store
13374    */
13375   Uint32 segments= (new_batch_size + (SectionSegment::DataLength -2 )) /
13376     SectionSegment::DataLength;
13377 
13378   if (segments <= scanP->scan_acc_segments)
13379   {
13380     // No need to allocate more segments.
13381     return true;
13382   }
13383 
13384   /* Should never get here for reserved scans */
13385   ndbrequire(!scanP->m_reserved);
13386 
13387   if (new_batch_size > 1)
13388   {
13389     for (Uint32 i = 1 + scanP->scan_acc_segments; i <= segments; i++)
13390     {
13391       Uint32 seg= seizeSingleSegment();
13392       if (unlikely(seg == RNIL))
13393       {
13394         jam();
13395         /* Cleanup any allocated segments and return */
13396         scanP->scan_acc_segments= (i-1);
13397         release_acc_ptr_list(scanP);
13398         return false;
13399       }
13400       scanP->scan_acc_op_ptr[i]= seg;
13401     }
13402   }
13403   scanP->scan_acc_segments= segments;
13404   return true;
13405 }
13406 
13407 void
release_acc_ptr_list(ScanRecord * scanP)13408 Dblqh::release_acc_ptr_list(ScanRecord* scanP)
13409 {
13410   Uint32 i, segments;
13411   segments= scanP->scan_acc_segments;
13412 
13413   for (i= 1; i <= segments; i++) {
13414     releaseSection(scanP->scan_acc_op_ptr[i]);
13415   }
13416   scanP->scan_acc_segments= 0;
13417   scanP->scan_acc_index = 0;
13418 }
13419 
13420 Uint32
seizeSingleSegment()13421 Dblqh::seizeSingleSegment()
13422 {
13423   Uint32 junk= 0;
13424   Uint32 iVal= RNIL;
13425 
13426   /* Rather grungy way to grab a segment */
13427   if (!appendToSection(iVal, &junk, 1))
13428     return RNIL;
13429 
13430   return iVal;
13431 }
13432 
13433 void
init_acc_ptr_list(ScanRecord * scanP)13434 Dblqh::init_acc_ptr_list(ScanRecord* scanP)
13435 {
13436   scanP->scan_acc_index = 0;
13437 }
13438 
13439 Uint32
get_acc_ptr_from_scan_record(ScanRecord * scanP,Uint32 index,bool crash_flag)13440 Dblqh::get_acc_ptr_from_scan_record(ScanRecord* scanP,
13441                                     Uint32 index,
13442                                     bool crash_flag)
13443 {
13444   Uint32* acc_ptr;
13445   if (!((index < MAX_PARALLEL_OP_PER_SCAN) &&
13446        index < scanP->scan_acc_index)) {
13447     ndbrequire(crash_flag);
13448     return RNIL;
13449   }
13450   i_get_acc_ptr(scanP, acc_ptr, index);
13451   return *acc_ptr;
13452 }
13453 
13454 void
set_acc_ptr_in_scan_record(ScanRecord * scanP,Uint32 index,Uint32 acc)13455 Dblqh::set_acc_ptr_in_scan_record(ScanRecord* scanP,
13456                                   Uint32 index, Uint32 acc)
13457 {
13458   Uint32 *acc_ptr;
13459   ndbrequire((index == 0 || scanP->scan_acc_index == index) &&
13460              (index < MAX_PARALLEL_OP_PER_SCAN));
13461   scanP->scan_acc_index= index + 1;
13462   i_get_acc_ptr(scanP, acc_ptr, index);
13463   *acc_ptr= acc;
13464 }
13465 
13466 /**
13467  * The design of the SCAN algorithm within one LDM instance
13468  * --------------------------------------------------------
13469  * DBLQH controls the execution of scans on tables on behalf of DBTC and
13470  * DBSPJ. Here follows a signal overview of how a scan is performed within
13471  * one LDM instance. For a description of the global scan protocol
13472  * see DbtcMain.cpp as a comment before execSCAN_TABREQ.
13473  *
13474  * DBLQH only controls execution of a scan towards one partition of a
13475  * table. DBTC/DBSPJ is responsible for execution of scans toward the
13476  * entire table and ensuring that the API sees a consistent view of the
13477  * table.
13478  *
13479  * There are currently four types of scans implemented in one LDM
13480  * instance:
13481  *
13482  * Full table scan using hash index. This is implemented in DBACC.
13483  * Full table scan using row by row. This is implemented in DBTUP.
13484  * Full table scan using row by row in disk order. This is implemented in
13485  *   DBTUP.
13486  * Index scan using one or several ranges. This is implemented in DBTUX.
13487  *
13488  * DBLQH controls execution of one partition scan, Dependent on the scan
13489  * type, DBACC/DBTUP/DBTUX is responsible to get the row references to
13490  * the tuple scanned. DBTUP is responsible for reading of those rows and
13491  * finally DBACC is responsible for any locking of rows required as part
13492  * of the scan.
13493  *
13494  * Each scan is controlled by an interpreted program created by the API
13495  * and transported down to DBTUP. This program is sent as part of the
13496  * SCAN_FRAGREQ signal and passed to DBTUP in the STORED_PROCREQ signal.
13497  * This program is applied on each row reference passed to DBTUP by
13498  * execution of the execTUPKEYREQ signal.
13499  *
13500  * In index ranges one or more ranges is sent in the keyinfo part of the
13501  * SCAN_FRAGREQ. This range information is sent to DBTUX one range at a
13502  * time. Actually with multiple ranges, DBLQH will treat each range as a
13503  * separate scan towards the other blocks, so a scan will be started and
13504  * closed towards DBACC/DBTUP/DBTUX for each range involved.
13505  *
13506  * As an optimisation all signals locally in one LDM instance have been
13507  * converted to direct signals.
13508  * The following signals are used as part of the scan of one partition.
13509  * ACC_SCANREQ:
13510  *   This signal initialises an operation record in DBACC/DBTUP/DBTUX for
13511  *   scan of one range or a full partition. Always sent as a direct signal
13512  *   and returned immediately through signal object on return.
13513  *
13514  * STORED_PROCREQ:
13515  *   This signal stores the interpreted program used to read each tuple
13516  *   as part of the scan. The same signal is also used to deallocate the
13517  *   the interpreted program when the entire scan of all ranges have been
13518  *   completed. Always sent as a direct signal and returned immediately
13519  *   through signal object on return.
13520  *
13521  * ACC_LOCKREQ:
13522  *   Certain scans require a lock on the row before the row is read, this
13523  *   signal acquires such a lock. Always sent as a direct signal. Return
13524  *   signal not always sent immediately.
13525  *
13526  * ACCKEYCONF:
13527  *   Signal returned when the lock have been acquired, the signal is
13528  *   normally sent directly when the row is not locked, but for a locked
13529  *   row the signal can be sent even a second or more later. When sent the
13530  *   signal is sent as a direct signal.
13531  *
13532  * ACCKEYREF:
13533  *   Signal returned when acquiring lock failed, e.g. due to record deleted
13534  *   while waiting for it.
13535  *
13536  * ACC_ABORTCONF:
13537  *   Signal returned after aborting a scan using an asynchronous message to
13538  *   ensure that all asynchronous messages are delivered since setting the
13539  *   scan state as aborted.
13540  *
13541  * NEXT_SCANREQ:
13542  *   This signal is used with different meaning:
13543  *   ZSCAN_NEXT:
13544  *     Get the next row reference to read, returned in NEXT_SCANCONF signal.
13545  *   ZSCAN_NEXT_COMMIT:
13546  *     Get the next row reference to read AND unlock the specified row.
13547  *     Returned in NEXT_SCANCONF signal.
13548  *   ZSCAN_COMMIT:
13549  *     Unlock the specified row. Return signal is simply returned when
13550  *     returning from call to execNEXT_SCANREQ.
13551  *   ZSCAN_CLOSE:
13552  *     Close the scan in DBACC/DBTUP/DBTUX.
13553  *
13554  *   When sent as ZSCAN_COMMIT and ZSCAN_CLOSE it is always sent as a direct
13555  *   signal. Otherwise it is sent as direct or asynchronous signal dependent
13556  *   on the value of the scan_direct_count variable in the DBLQH scan
13557  *   record. The scan_direct_count variable ensures that we keep the number
13558  *   of direct signals sent bounded.
13559  *
13560  * NEXT_SCANCONF:
13561  *   Return signal to NEXT_SCANREQ containing row reference to read or
13562  *   indication of close completed. Always sent as a direct signal.
13563  *
13564  * TUPKEYREQ:
13565  *   This signal does the actual read of the row and sends the read row data
13566  *   directly to the API using the TRANSID_AI signal. This signal is always
13567  *   sent as a direct signal.
13568  *
13569  * ACC_CHECK_SCAN:
13570  *   Continue scanning from specified place. Used by DBACC/DBTUP/DBTUX as an
13571  *   internal signal as part of the scan. This signal can be sent both as
13572  *   an asynchronous signal and as a direct signal.
13573  *
13574  * SCAN_FRAGCONF:
13575  *   Return signal sent to DBTC/DBSPJ after completing a part of the scan,
13576  *   the signal carries a set of references to rows sent to the API. After
13577  *   sending this signal DBLQH will stop and wait for a SCAN_NEXTREQ to
13578  *   signal asking DBLQH to continue the scan of the partition. The number
13579  *   of rows scanned before sending SCAN_FRAGCONF is dependent on both
13580  *   configuration parameters and information in the SCAN_FRAGREQ signal.
13581  *
13582  *   This signal is also sent when the scan is fully completed.
13583  *   This signal is normally a distributed signal, so it is always sent as
13584  *   an asynchronous signal.
13585  *
13586  * SCAN_NEXTREQ:
13587  *   Request to continue scanning from DBTC/DBSPJ as requested to them from
13588  *   API.
13589  *   This signal is normally a distributed signal, so it is always sent as
13590  *   an asynchronous signal.
13591  *
13592  *  Below follows an example signal diagram of a scan of one partition.
13593  *
13594  *  DBLQH          ACC          TUP         ACC/TUP/TUX    API      DBTC
13595  *    |ACC_SCANREQ
13596  *    |----------------------------------------->|
13597  *    |<-----------------------------------------|
13598  *    | STORED_PROCREQ
13599  *    |------------------------->|
13600  *    |<-------------------------|
13601  *    | NEXT_SCANREQ (ZSCAN_NEXT)
13602  *    |----------------------------------------->|
13603  *    |                          prepare_scanTUPKEYREQ
13604  *    |                          |<--------------|
13605  *    |                          |-------------->|
13606  *    | NEXT_SCANCONF
13607  *    |<-----------------------------------------|
13608  *    | TUPKEYREQ
13609  *    |------------------------->|  TRANSID_AI
13610  *    |                          |-------------------------->|
13611  *    |<-------------------------|
13612  *    | NEXT_SCANREQ (ZSCAN_NEXT_COMMIT)
13613  *    |----------------------------------------->|
13614  *    |                          prepare_scanTUPKEYREQ
13615  *    |                          |<--------------|
13616  *    |                          |-------------->|
13617  *    | NEXT_SCANCONF
13618  *    |<-----------------------------------------|
13619  *    | TUPKEYREQ
13620  *    |------------------------->|  TRANSID_AI
13621  *    |                          |-------------------------->|
13622  *    |<-------------------------|
13623  *    Repeat above for as many rows as required before returning to the
13624  *    API. The above TRANSID_AI isn't necessary, the interpreted program
13625  *    could perform selection and decide to not send a specific row since
13626  *    it doesn't match the condition checked by the interpreted program.
13627  *    |
13628  *    | SCAN_FRAGCONF
13629  *    |---------------------------------------------------------------->|
13630  *    .... Some time for API and DBTC to process things.
13631  *    | SCAN_NEXTREQ
13632  *    |<----------------------------------------------------------------|
13633  *    | NEXT_SCANREQ (ZSCAN_NEXT_COMMIT)
13634  *    |----------------------------------------->|
13635  *    |                          prepare_scanTUPKEYREQ
13636  *    |                          |<--------------|
13637  *    |                          |-------------->|
13638  *    | NEXT_SCANCONF
13639  *    |<-----------------------------------------|
13640  *    | TUPKEYREQ
13641  *    |------------------------->|  TRANSID_AI
13642  *    |                          |-------------------------->|
13643  *    |<-------------------------|
13644  *    Repeat above again until time for next SCAN_FRAGCONF to be sent.
13645  *    When scan from NEXT_SCANCONF indicates there are no more tuples to
13646  *    fetch one starts to close the scan.
13647  *
13648  *    |
13649  *    | NEXT_SCANREQ (ZSCAN_NEXT_COMMIT)
13650  *    |----------------------------------------->|
13651  *    | NEXT_SCANCONF(no more tuples)
13652  *    |<-----------------------------------------|
13653  *    | NEXT_SCANREQ (ZSCAN_CLOSE)
13654  *    |----------------------------------------->|
13655  *    | NEXT_SCANCONF
13656  *    |<-----------------------------------------|
13657  *    | STORED_PROCREQ (delete interpreted program)
13658  *    |------------------------->|
13659  *    |<-------------------------|
13660  *    | SCAN_FRAGCONF (close flag set)
13661  *    |---------------------------------------------------------------->|
13662  *    Now the scan is completed.
13663  *
13664  *    Now a number of variations on the above signal diagrams:
13665  *    Scan with locking:
13666  *    In this we use the flag ZSCAN_NEXT all the time and never
13667  *    ZSCAN_NEXT_COMMIT, we handle things a bit differently instead when
13668  *    receiving SCAN_NEXTREQ where we perform a signal diagram like this:
13669  *
13670  *    | NEXT_SCANREQ (ZSCAN_COMMIT)
13671  *    |----------------------------------------->|
13672  *    |<-----------------------------------------|
13673  *    This is repeated for each row sent to the API in the previous
13674  *    SCAN_FRAGCONF signal.
13675  *
13676  *    If the application wants the row locked for longer time he have had
13677  *    the chance to perform a key lookup operation that took over the lock
13678  *    such that even when we unlock the scan lock, the transaction still
13679  *    retains a lock on the row.
13680  *
13681  *    After each row scanned we check if we've reached a scan heartbeat
13682  *    timeout. In case we have we send a SCAN_HBREP signal to DBTC/DBSPJ
13683  *    to inform about that we're still actively scanning even though no
13684  *    result rows have been sent. Remember here that a scan in DBLQH can
13685  *    potentially scan billions of rows while only returning very few to
13686  *    the API. Thus we can scan for an extended time without returning to
13687  *    the API. This is handled by the method check_send_scan_hb_rep.
13688  *
13689  *    Already from returning from ACC_SCANREQ we can discover that the
13690  *    partition (== fragment) is empty and go immediately to the close
13691  *    down code.
13692  *    For index scans we will send TUX_BOUND_INFO after ACC_SCANREQ and
13693  *    before sending STORED_PROCREQ to DBTUX. This will provide one range
13694  *    to DBTUX for scanning, if multiple ranges are to be scanned we
13695  *    startup a new scan as if it was a new SCAN_FRAGREQ received, but we
13696  *    don't need to send STORED_PROCREQ since the same interpreted program
13697  *    will be used. We will however send ACC_SCANREQ and TUX_BOUND_INFO
13698  *    also for this new range.
13699  *
13700  *  There are various reasons for temporarily stopping a scan, this could
13701  *  lack of operation records, holding too many row locks, one could also
13702  *  end up in this situation after waiting for a row lock.
13703  *
13704  *  To restart the scan again after any type of temporary stop one sends
13705  *  the signal ACC_CHECK_SCAN either as direct or as an asynchronous signal
13706  *  to DBACC/DBTUP/DBTUX. This signal is sent from many different places in
13707  *  DBLQH, DBACC, DBTUP and DBTUX. It is always sent as part of NEXT_SCANREQ
13708  *  processing.
13709  *
13710  *  When executing ACC_CHECK_SCAN one can flag to DBACC/DBTUP/DBTUX that one
13711  *  should check for a 1 ms delay with the flag ZCHECK_LCP_STOP. In previous
13712  *  versions this was also related to local checkpoints, this is no longer
13713  *  the case. Now it's only related to situations where it is required to
13714  *  perform an extra wait such that resources becomes available again.
13715  *
13716  *  DBTUP and DBTUX sends the signal CHECK_LCP_STOP to DBLQH in a number of
13717  *  situations, among other things when a locked key has been encountered.
13718  *  When the ACCKEYCONF signal then is received indicating that one acquired
13719  *  the lock, DBLQH will still wait for CHECK_LCP_STOP from DBLQH to return
13720  *  after a 1 ms delay. This is on the TODO-list to fix to ensure that we can
13721  *  proceed with these locked rows immediately after delivery. As it is now
13722  *  we can get up to 1 ms delay each time we encounter a locked row.
13723  */
13724 /* -------------------------------------------------------------------------
13725  * SCAN_FRAGREQ: Request to start scanning the specified fragment of a table.
13726  * ------------------------------------------------------------------------- */
send_scan_fragref(Signal * signal,Uint32 transid1,Uint32 transid2,Uint32 senderData,Uint32 senderBlockRef,Uint32 errorCode)13727 void Dblqh::send_scan_fragref(Signal* signal,
13728                               Uint32 transid1,
13729                               Uint32 transid2,
13730                               Uint32 senderData,
13731                               Uint32 senderBlockRef,
13732                               Uint32 errorCode)
13733 {
13734   ScanFragRef * ref = (ScanFragRef*)&signal->theData[0];
13735   ref->senderData = senderData;
13736   ref->transId1 = transid1;
13737   ref->transId2 = transid2;
13738   ref->errorCode = errorCode;
13739   sendSignal(senderBlockRef, GSN_SCAN_FRAGREF, signal,
13740 	     ScanFragRef::SignalLength, JBB);
13741 }
13742 
execSCAN_FRAGREQ(Signal * signal)13743 void Dblqh::execSCAN_FRAGREQ(Signal* signal)
13744 {
13745   jamEntry();
13746 
13747   /* Reassemble if the request was fragmented */
13748   if (unlikely(!assembleFragments(signal)))
13749   {
13750     jam();
13751     return;
13752   }
13753 
13754   if (ERROR_INSERTED(5097))
13755   {
13756     ndbabort();
13757   }
13758 
13759   ScanFragReq * const scanFragReq = (ScanFragReq *)&signal->theData[0];
13760   Uint32 errorCode= 0;
13761   Uint32 hashIndex;
13762   TcConnectionrecPtr nextHashptr;
13763   TcConnectionrec * regTcPtr;
13764   Uint32 senderHi = signal->getSendersBlockRef();
13765   // bug#13834481 hashHi!=0 caused timeout (tx not found)
13766 
13767   tabptr.i = scanFragReq->tableId;
13768 
13769   /* Short SCANFRAGREQ has no sections, Long SCANFRAGREQ has 1 or 2
13770    * Section 0 : Mandatory ATTRINFO section
13771    * Section 1 : Optional KEYINFO section
13772    */
13773   const Uint32 numSections= signal->getNoOfSections();
13774   const bool isLongReq= ( numSections != 0 );
13775 
13776   SectionHandle handle(this, signal);
13777 
13778   SegmentedSectionPtr attrInfoPtr, keyInfoPtr;
13779   Uint32 aiLen= 0;
13780   Uint32 keyLen= 0;
13781 
13782   if (likely(isLongReq))
13783   {
13784     /* Long request, get Attr + Key len from section sizes */
13785     handle.getSection(attrInfoPtr, ScanFragReq::AttrInfoSectionNum);
13786     aiLen= attrInfoPtr.sz;
13787 
13788     if (numSections == 2)
13789     {
13790       handle.getSection(keyInfoPtr, ScanFragReq::KeyInfoSectionNum);
13791       keyLen= keyInfoPtr.sz;
13792     }
13793   }
13794   else
13795   {
13796     /* Short request, get Attr + Key len from signal */
13797     aiLen= ScanFragReq::getAttrLen(scanFragReq->requestInfo);
13798     keyLen= (scanFragReq->fragmentNoKeyLen >> 16);
13799     /*
13800      * bug#13834481.  Clear attribute length so that it is not
13801      * re-interpreted as new 7.x bits.  initScanrec() uses signal
13802      * data so we must modify signal data.
13803      */
13804     ScanFragReq::clearAttrLen(scanFragReq->requestInfo);
13805   }
13806   const Uint32 senderBlockRef = signal->senderBlockRef();
13807   const Uint32 transid1 = scanFragReq->transId1;
13808   const Uint32 transid2 = scanFragReq->transId2;
13809   const Uint32 senderData = scanFragReq->senderData;
13810   TcConnectionrecPtr tcConnectptr;
13811   if (likely(ctcNumFree > ZNUM_RESERVED_UTIL_CONNECT_RECORDS &&
13812              !ERROR_INSERTED(5055)) ||
13813       (ScanFragReq::getLcpScanFlag(scanFragReq->requestInfo)) ||
13814       (refToMain(senderBlockRef) == BACKUP) ||
13815       (refToMain(senderBlockRef) == DBUTIL &&
13816        ctcNumFree > ZNUM_RESERVED_TC_CONNECT_RECORDS))
13817   {
13818     /**
13819      * We always keep 3 operation records, one for LCP scans and one for
13820      * Node recovery support (to handle COPY_FRAGREQ when we're aiding a
13821      * node to startup by synchronizing our data with the starting nodes
13822      * recovered data and finally one for backup scans.
13823      *
13824      * We also provide 100 records not available to ordinary transactions
13825      * but available to DBUTIL operations. But LCP and Backup operations
13826      * still have preference over DBUTIL operations.
13827      */
13828     seizeTcrec(tcConnectptr);
13829     jamEntry();
13830   }
13831   else
13832   {
13833     jamEntry();
13834     if (unlikely(ERROR_INSERTED_CLEAR(5055) ||
13835                  (!seize_op_rec(tcConnectptr))))
13836     {
13837       jam();
13838       /* --------------------------------------------------------------------
13839        *      NO FREE TC RECORD AVAILABLE, THUS WE CANNOT HANDLE THE REQUEST.
13840        * -------------------------------------------------------------------- */
13841       tcConnectptr.i = RNIL;
13842       errorCode = ZNO_TC_CONNECT_ERROR;
13843       releaseSections(handle);
13844       send_scan_fragref(signal,
13845                         transid1,
13846                         transid2,
13847                         senderData,
13848                         senderBlockRef,
13849                         ZNO_TC_CONNECT_ERROR);
13850     }
13851   }//if
13852   jamLineDebug(Uint16(tcConnectptr.i));
13853   regTcPtr = tcConnectptr.p;
13854   const Uint32 savePointId = scanFragReq->savePointId;
13855   regTcPtr->clientConnectrec = senderData;
13856   regTcPtr->clientBlockref = senderBlockRef;
13857   regTcPtr->savePointId = savePointId;
13858 
13859   ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
13860   if (unlikely(tabptr.p->tableStatus != Tablerec::TABLE_DEFINED &&
13861                tabptr.p->tableStatus != Tablerec::TABLE_READ_ONLY))
13862   {
13863     jam();
13864     errorCode = get_table_state_error(tabptr);
13865     releaseSections(handle);
13866     goto error_handler;
13867   }
13868 
13869   if (unlikely(table_version_major(scanFragReq->schemaVersion) !=
13870                table_version_major(tabptr.p->schemaVersion)))
13871   {
13872     errorCode = ZINVALID_SCHEMA_VERSION;
13873     releaseSections(handle);
13874     goto error_handler;
13875   }
13876 
13877   {
13878     const Uint32 reqinfo = scanFragReq->requestInfo;
13879     const Uint32 fragId = (scanFragReq->fragmentNoKeyLen & 0xFFFF);
13880     const Uint32 max_rows = scanFragReq->batch_size_rows;
13881     const Uint32 scanLockMode = ScanFragReq::getLockMode(reqinfo);
13882     const Uint8 keyinfo = ScanFragReq::getKeyinfoFlag(reqinfo);
13883     const Uint8 rangeScan = ScanFragReq::getRangeScanFlag(reqinfo);
13884     /**
13885      * A write always has to get keyinfo
13886      */
13887     ndbrequire(scanLockMode == 0 || keyinfo);
13888 
13889     ndbrequire(max_rows > 0 && max_rows <= MAX_PARALLEL_OP_PER_SCAN);
13890     if (unlikely(!getFragmentrec(signal, fragId)))
13891     {
13892       jam();
13893       errorCode = 1231;
13894       releaseSections(handle);
13895       goto error_handler;
13896     }//if
13897 
13898     // Verify scan type vs table type (both sides are boolean)
13899     if (unlikely(rangeScan !=
13900                  DictTabInfo::isOrderedIndex(fragptr.p->tableType)))
13901     {
13902       jam();
13903       errorCode = 1232;
13904       releaseSections(handle);
13905       goto error_handler;
13906     }//if
13907 
13908     if (ScanFragReq::getLcpScanFlag(reqinfo) ||
13909         refToMain(senderHi) == BACKUP)
13910     {
13911       /* LCP and Backup scans come here */
13912       jam();
13913       ndbrequire(m_reserved_scans.first(scanptr));
13914       m_reserved_scans.remove(scanptr);
13915     }
13916     else if (unlikely(!c_scanRecordPool.seize(scanptr)))
13917     {
13918       jam();
13919       errorCode = ScanFragRef::ZNO_FREE_SCANREC_ERROR;
13920       releaseSections(handle);
13921       goto error_handler;
13922     }
13923 
13924     initScanTc(scanFragReq,
13925                transid1,
13926                transid2,
13927                fragId,
13928                ZNIL,
13929                senderHi,
13930                tcConnectptr);
13931     regTcPtr->opExec =
13932       (1 ^ ScanFragReq::getNotInterpretedFlag(reqinfo));
13933     {
13934       const Uint32 applRef = scanFragReq->resultRef;
13935 
13936       regTcPtr->save1 = 0;
13937       regTcPtr->primKeyLen = keyLen;
13938       regTcPtr->applRef = applRef;
13939     }
13940 
13941     if (likely(isLongReq))
13942     {
13943       jamDebug();
13944       regTcPtr->attrInfoIVal= attrInfoPtr.i;
13945       if (keyLen)
13946         regTcPtr->keyInfoIVal= keyInfoPtr.i;
13947       /* Scan state machine is now responsible for freeing
13948        * these sections, usually via releaseOprec()
13949        */
13950       handle.clear();
13951 
13952       /*
13953          Update per fragment statistics. 'attrInfoPtr' is not defined for
13954          short-signal scans, so we ignore these, since they only happen
13955          during online upgrades.
13956        */
13957       if (!ScanFragReq::getLcpScanFlag(reqinfo))
13958       {
13959         jamDebug();
13960         Fragrecord::UsageStat& useStat = fragptr.p->m_useStat;
13961         useStat.m_scanFragReqCount++;
13962         useStat.m_scanBoundWords+= keyLen;
13963         if (!ScanFragReq::getNotInterpretedFlag(reqinfo))
13964         {
13965           /* Backup scans do not use interpreted mode. */
13966           useStat.m_scanProgramWords+= getProgramWordCount(attrInfoPtr);
13967         }
13968       }
13969     }
13970 
13971     if (ScanFragReq::getCorrFactorFlag(reqinfo))
13972     {
13973       /**
13974        * Correlation factor for SPJ
13975        */
13976       const Uint32 corrFactorHi = scanFragReq->variableData[1];
13977       regTcPtr->m_corrFactorLo = scanFragReq->variableData[0];
13978       regTcPtr->m_corrFactorHi = corrFactorHi;
13979     }
13980     jamLineDebug((Uint16)aiLen);
13981     errorCode = initScanrec(scanFragReq, aiLen, tcConnectptr);
13982     if (unlikely(errorCode != ZOK))
13983     {
13984       jam();
13985       goto error_handler2;
13986     }//if
13987 
13988     /* Check that no equal element already exists */
13989     ndbrequire(findTransaction(regTcPtr->transid[0],
13990                                regTcPtr->transid[1],
13991                                regTcPtr->tcOprec,
13992                                senderHi,
13993                                tcConnectptr) == ZNOT_FOUND);
13994     hashIndex = (regTcPtr->transid[0] ^ regTcPtr->tcOprec) &
13995                  (TRANSID_HASH_SIZE - 1);
13996     nextHashptr.i = ctransidHash[hashIndex];
13997     ctransidHash[hashIndex] = tcConnectptr.i;
13998     regTcPtr->prevHashRec = RNIL;
13999     regTcPtr->nextHashRec = nextHashptr.i;
14000     regTcPtr->hashIndex = hashIndex;
14001     if (nextHashptr.i != RNIL)
14002     {
14003       /* ---------------------------------------------------------------------
14004        *   ENSURE THAT THE NEXT RECORD HAS SET PREVIOUS TO OUR RECORD
14005        *   IF IT EXISTS
14006        * --------------------------------------------------------------------- */
14007       ndbrequire(tcConnect_pool.getValidPtr(nextHashptr));
14008       ndbassert(nextHashptr.p->prevHashRec == RNIL);
14009       nextHashptr.p->prevHashRec = tcConnectptr.i;
14010     }//if
14011     if (unlikely((! isLongReq ) &&
14012         ( scanptr.p->scanAiLength > 0 )))
14013     {
14014       jam();
14015       regTcPtr->transactionState = TcConnectionrec::WAIT_SCAN_AI;
14016       scanptr.p->scan_lastSeen = __LINE__;
14017       return;
14018     }//if
14019     continueAfterReceivingAllAiLab(signal, tcConnectptr);
14020     return;
14021   }
14022 
14023 error_handler2:
14024   // no scan number allocated
14025   scanptr.p->scan_lastSeen = __LINE__;
14026   if (scanptr.p->m_reserved == 0)
14027   {
14028     jam();
14029     c_scanRecordPool.release(scanptr);
14030     checkPoolShrinkNeed(DBLQH_SCAN_RECORD_TRANSIENT_POOL_INDEX,
14031                         c_scanRecordPool);
14032   }
14033   else
14034   {
14035     jam();
14036     init_release_scanrec(scanptr.p);
14037     m_reserved_scans.addFirst(scanptr);
14038   }
14039 error_handler:
14040   regTcPtr->abortState = TcConnectionrec::ABORT_ACTIVE;
14041   regTcPtr->tcScanRec = RNIL;
14042   releaseOprec(signal, tcConnectptr);
14043   releaseTcrec(signal, tcConnectptr);
14044   send_scan_fragref(signal,
14045                     transid1,
14046                     transid2,
14047                     senderData,
14048                     senderBlockRef,
14049                     errorCode);
14050 }//Dblqh::execSCAN_FRAGREQ()
14051 
continueAfterReceivingAllAiLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)14052 void Dblqh::continueAfterReceivingAllAiLab(
14053                 Signal* signal,
14054                 const TcConnectionrecPtr tcConnectptr)
14055 {
14056   TcConnectionrec * const regTcPtr = tcConnectptr.p;
14057   ScanRecord * const scanPtr = scanptr.p;
14058   regTcPtr->transactionState = TcConnectionrec::SCAN_STATE_USED;
14059 
14060   if (unlikely(scanPtr->scanState == ScanRecord::IN_QUEUE))
14061   {
14062     jam();
14063     scanPtr->scan_lastSeen = __LINE__;
14064     return;
14065   }
14066 
14067   scanPtr->scanState = ScanRecord::WAIT_ACC_SCAN;
14068   AccScanReq * req = (AccScanReq*)&signal->theData[0];
14069 
14070   Uint32 requestInfo = 0;
14071 
14072   AccScanReq::setLockMode(requestInfo, scanPtr->scanLockMode);
14073   AccScanReq::setReadCommittedFlag(requestInfo, scanPtr->readCommitted);
14074   AccScanReq::setDescendingFlag(requestInfo, scanPtr->descending);
14075   AccScanReq::setStatScanFlag(requestInfo, scanPtr->statScan);
14076 
14077   if (refToMain(regTcPtr->clientBlockref) == BACKUP)
14078   {
14079     if (scanPtr->lcpScan)
14080     {
14081       AccScanReq::setNoDiskScanFlag(requestInfo, 1);
14082       AccScanReq::setLcpScanFlag(requestInfo, 1);
14083     }
14084     else
14085     {
14086       /* If backup scan disktables in disk order */
14087 #ifdef BUG25353234_PROPERLY_FIXED
14088       AccScanReq::setNoDiskScanFlag(requestInfo,
14089                                     !regTcPtr->m_disk_table);
14090 #else
14091       AccScanReq::setNoDiskScanFlag(requestInfo, 1);
14092 #endif
14093       AccScanReq::setLcpScanFlag(requestInfo, 0);
14094     }
14095   }
14096   else
14097   {
14098 #ifdef BUG_27776_FIXED
14099     AccScanReq::setNoDiskScanFlag(requestInfo,
14100                                   !regTcPtr->m_disk_table);
14101 #else
14102     AccScanReq::setNoDiskScanFlag(requestInfo, 1);
14103 #endif
14104     AccScanReq::setLcpScanFlag(requestInfo, 0);
14105   }
14106 
14107   SimulatedBlock *block = scanPtr->scanBlock;
14108   req->requestInfo = requestInfo;
14109 
14110   const Uint32 senderData = scanptr.i;
14111   const Uint32 senderRef = cownref;
14112   const Uint32 tableId = regTcPtr->tableref;
14113   const Uint32 fragmentNo = regTcPtr->fragmentid;
14114   const Uint32 transId1 = regTcPtr->transid[0];
14115   const Uint32 transId2 = regTcPtr->transid[1];
14116   const Uint32 savePointId = regTcPtr->savePointId;
14117   ExecFunction f = block->getExecuteFunction(GSN_ACC_SCANREQ);
14118 
14119   req->senderData = senderData;
14120   req->senderRef = senderRef;
14121   req->tableId = tableId;
14122   req->fragmentNo = fragmentNo;
14123   req->transId1 = transId1;
14124   req->transId2 = transId2;
14125   req->savePointId = savePointId;
14126 
14127   block->EXECUTE_DIRECT_FN(f, signal);
14128   if (signal->theData[8] == 0)
14129   {
14130     /* ACC_SCANCONF */
14131     jamEntryDebug();
14132     accScanConfScanLab(signal, tcConnectptr);
14133     return;
14134   }
14135   else
14136   {
14137     /* ACC_SCANREF */
14138     jamEntry();
14139     AccScanRef *ref = (AccScanRef*)&signal->theData[0];
14140     ref->errorCode = signal->theData[8];
14141     execACC_SCANREF(signal, tcConnectptr);
14142     return;
14143   }
14144 }//Dblqh::continueAfterReceivingAllAiLab()
14145 
accScanConfScanLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)14146 void Dblqh::accScanConfScanLab(Signal* signal,
14147                                const TcConnectionrecPtr tcConnectptr)
14148 {
14149   AccScanConf * const accScanConf = (AccScanConf *)&signal->theData[0];
14150   ScanRecord * const scanPtr = scanptr.p;
14151 
14152   /* -----------------------------------------------------------------------
14153    *       PRECONDITION: SCAN_STATE = WAIT_ACC_SCAN
14154    * ----------------------------------------------------------------------- */
14155   if (accScanConf->flag == AccScanConf::ZEMPTY_FRAGMENT)
14156   {
14157     jam();
14158     /* ---------------------------------------------------------------------
14159      *       THE FRAGMENT WAS EMPTY.
14160      *       REPORT SUCCESSFUL COPYING.
14161      * --------------------------------------------------------------------- */
14162     /*
14163      * MRR scan + delete can hit this when the fragment was not
14164      * initially empty, but has become empty after previous range.
14165      */
14166     if (scanPtr->scanStoredProcId != RNIL)
14167     {
14168       jam();
14169       scanPtr->scanCompletedStatus = ZTRUE;
14170       accScanCloseConfLab(signal, tcConnectptr);
14171       return;
14172     }
14173     tupScanCloseConfLab(signal, tcConnectptr);
14174     return;
14175   }//if
14176 
14177   check_send_scan_hb_rep(signal, scanPtr, tcConnectptr.p);
14178 
14179   scanPtr->scanAccPtr = accScanConf->accPtr;
14180   if (scanPtr->rangeScan)
14181   {
14182     TuxBoundInfo* req = (TuxBoundInfo*)signal->getDataPtrSend();
14183     req->errorCode = RNIL;
14184     req->tuxScanPtrI = scanPtr->scanAccPtr;
14185     Uint32 len = req->boundAiLength = copyNextRange(req->data, tcConnectptr.p);
14186     signal->setLength(TuxBoundInfo::SignalLength + len);
14187     c_tux->execTUX_BOUND_INFO(signal);
14188     jamEntryDebug();
14189     if (unlikely(req->errorCode != 0))
14190     {
14191       jam();
14192       /*
14193        * Cannot use STORED_PROCREF to abort since even the REF
14194        * returns a stored proc id.  So record error and continue.
14195        * The scan is already Invalid in TUX and returns empty set.
14196        */
14197       tcConnectptr.p->errorCode = req->errorCode;
14198     }
14199   }
14200 
14201   if (scanPtr->scanStoredProcId == RNIL)
14202   {
14203     TcConnectionrec * const regTcPtr = tcConnectptr.p;
14204     jamDebug();
14205     /* Send AttrInfo to TUP to store as 'stored procedure'
14206      * and get storedProcId back for future reference
14207      */
14208     const Uint32 sig0 = regTcPtr->tupConnectrec;
14209     const Uint32 sig1 = regTcPtr->tableref;
14210     const Uint32 sig2 = scanPtr->scanSchemaVersion;
14211     const Uint32 sig5 = scanPtr->scanApiBlockref;
14212     const Uint32 sig6 = regTcPtr->attrInfoIVal;
14213 
14214     signal->theData[0] = sig0;
14215     signal->theData[1] = sig1;
14216     signal->theData[2] = sig2;
14217     signal->theData[3] = ZSTORED_PROC_SCAN;
14218 // theData[4] is not used
14219     signal->theData[5] = sig5;
14220     signal->theData[6] = sig6;
14221 
14222     /* Pass ATTRINFO as long section, we don't need
14223      * it after this
14224      */
14225     regTcPtr->attrInfoIVal= RNIL;
14226 
14227     c_tup->execSTORED_PROCREQ(signal);
14228     if (signal->theData[0] == 0)
14229     {
14230       /* STORED_PROCCONF */
14231       jamEntryDebug();
14232       Uint32 storedProcId = signal->theData[1];
14233       scanPtr->scanStoredProcId = storedProcId;
14234       c_tup->copyAttrinfo(storedProcId);
14235       storedProcConfScanLab(signal, tcConnectptr);
14236       return;
14237     }
14238     else
14239     {
14240       /* STORED_PROCREF */
14241       jamEntry();
14242       Uint32 storedProcId = signal->theData[2];
14243       scanPtr->scanCompletedStatus = ZTRUE;
14244       scanPtr->scanStoredProcId = storedProcId;
14245       tcConnectptr.p->errorCode = signal->theData[1];
14246       closeScanLab(signal, tcConnectptr.p);
14247       return;
14248     }
14249   }
14250   else
14251   {
14252     /* TUP already has the Stored procedure, continue */
14253     jam();
14254     c_tup->copyAttrinfo(scanPtr->scanStoredProcId);
14255     storedProcConfScanLab(signal, tcConnectptr);
14256     return;
14257   }
14258 }//Dblqh::accScanConfScanLab()
14259 
14260 /* -------------------------------------------------------------------------
14261  *       ENTER STORED_PROCCONF WITH
14262  *         0 success == CONF, 1 failure == REF
14263  *         STORED_PROC_ID
14264  * ------------------------------------------------------------------------- */
storedProcConfScanLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)14265 void Dblqh::storedProcConfScanLab(Signal* signal,
14266                                   const TcConnectionrecPtr tcConnectptr)
14267 {
14268   ScanRecord * const scanPtr = scanptr.p;
14269   if (unlikely(scanPtr->scanCompletedStatus == ZTRUE))
14270   {
14271     jam();
14272     // STOP THE SCAN PROCESS IF THIS HAS BEEN REQUESTED.
14273     closeScanLab(signal, tcConnectptr.p);
14274     return;
14275   }//if
14276   if (scanPtr->check_scan_batch_completed())
14277   {
14278     jam();
14279     scanPtr->m_last_row = 0;
14280     scanPtr->scanState = ScanRecord::WAIT_SCAN_NEXTREQ;
14281     scanPtr->scan_lastSeen = __LINE__;
14282     sendScanFragConf(signal, ZFALSE, tcConnectptr.p);
14283     return;
14284   }
14285   Fragrecord::FragStatus fragstatus = fragptr.p->fragStatus;
14286   if (likely(is_scan_ok(scanPtr, fragstatus)))
14287   {
14288     /**
14289      * At this point we have set up everything in DBLQH, in the
14290      * scan block (DBACC for full table scans, DBTUP for LCP scans
14291      * and node recovery scans and full table scans ordered by an
14292      * NDB API application and DBTUX for ordered index scans
14293      * (range scans).
14294      *
14295      * We will now rely on all operation pointers, scan pointers
14296      * fragment pointers and table pointers to be setup on the
14297      * block object for quick access. If we get a real-time break
14298      * we have to ensure that those are setup properly again.
14299      *
14300      * Real-time breaks have occurred when we receive the signal
14301      * SCAN_NEXTREQ, we could also get a real-time break from
14302      * the scan block by sending NEXT_SCANCONF as an asynchronous
14303      * signal. It can be ordered by the scan block by sending a
14304      * direct signal CHECK_LCP_STOP to DBLQH, this will lead to
14305      * a CONTINUEB with delay of 1 millisecond.
14306      *
14307      * Finally it can happen by sending a CONTINUEB signal from
14308      * send_next_NEXT_SCANREQ to ourselves. This signal simply
14309      * means we are in the middle of processing a local scan
14310      * operation, but we need to take a real-time break to
14311      * provide an opportunity for other operations to execute
14312      * as well.
14313      *
14314      * This is not always the first call to send_next_NEXT_SCANREQ.
14315      * The reason is that we can execute multiple ranges as part
14316      * of one scan. See scanNextLoopLab for more description of
14317      * stack unwinding when sending NEXT_SCANREQ.
14318      */
14319     const Uint32 sig0 = scanPtr->scanAccPtr;
14320     SimulatedBlock *block = scanPtr->scanBlock;
14321     ExecFunction f = scanPtr->scanFunction_NEXT_SCANREQ;
14322     Uint32 in_send_next_scan = m_in_send_next_scan;
14323     signal->theData[1] = RNIL;
14324     signal->theData[2] = NextScanReq::ZSCAN_NEXT;
14325     signal->theData[0] = sig0;
14326     scanPtr->scanState = ScanRecord::WAIT_NEXT_SCAN;
14327     scanPtr->scan_lastSeen = __LINE__;
14328     if (likely(in_send_next_scan == 0))
14329     {
14330       send_next_NEXT_SCANREQ(signal,
14331                              block,
14332                              f,
14333                              scanPtr,
14334                              tcConnectptr.p->clientConnectrec);
14335       return;
14336     }
14337     ndbassert(in_send_next_scan == 1);
14338     m_in_send_next_scan = 2;
14339     return;
14340   }
14341   else
14342   {
14343     jamLine(fragptr.p->fragStatus);
14344     ndbout_c("fragptr.p->fragStatus: %u",
14345              fragptr.p->fragStatus);
14346     // wl4391_todo SR 2-node CRASH_RECOVERING from BACKUP
14347     ndbabort();
14348   }
14349 }//Dblqh::storedProcConfScanLab()
14350 
scanAttrinfoLab(Signal * signal,Uint32 * dataPtr,Uint32 length,const TcConnectionrecPtr tcConnectptr)14351 void Dblqh::scanAttrinfoLab(Signal* signal,
14352                             Uint32* dataPtr,
14353                             Uint32 length,
14354                             const TcConnectionrecPtr tcConnectptr)
14355 {
14356   scanptr.i = tcConnectptr.p->tcScanRec;
14357   ndbrequire(c_scanRecordPool.getValidPtr(scanptr));
14358   if (saveAttrInfoInSection(dataPtr, length, tcConnectptr.p) == ZOK) {
14359     if (tcConnectptr.p->currTupAiLen < scanptr.p->scanAiLength) {
14360       jam();
14361     } else {
14362       jam();
14363       ndbrequire(tcConnectptr.p->currTupAiLen == scanptr.p->scanAiLength);
14364       continueAfterReceivingAllAiLab(signal, tcConnectptr);
14365     }//if
14366     return;
14367   }//if
14368   abort_scan(signal, scanptr.i, ZGET_ATTRINBUF_ERROR, tcConnectptr);
14369 }
14370 
abort_scan(Signal * signal,Uint32 scan_ptr_i,Uint32 errcode,const TcConnectionrecPtr tcConnectptr)14371 void Dblqh::abort_scan(Signal* signal,
14372                        Uint32 scan_ptr_i,
14373                        Uint32 errcode,
14374                        const TcConnectionrecPtr tcConnectptr)
14375 {
14376   jam();
14377   scanptr.i = scan_ptr_i;
14378   ndbrequire(c_scanRecordPool.getValidPtr(scanptr));
14379 
14380   tcConnectptr.p->errorCode = errcode;
14381   tupScanCloseConfLab(signal, tcConnectptr);
14382   return;
14383 }
14384 
14385 /*---------------------------------------------------------------------*/
14386 /* Send this 'I am alive' signal to TC when it is received from ACC    */
14387 /* We include the scanPtr.i that comes from ACC in signalData[1], this */
14388 /* tells TC which fragment record to check for a timeout.              */
14389 /*---------------------------------------------------------------------*/
14390 void
check_send_scan_hb_rep(Signal * signal,ScanRecord * scanPtrP,TcConnectionrec * tcPtrP)14391 Dblqh::check_send_scan_hb_rep(Signal* signal,
14392                               ScanRecord* scanPtrP,
14393                               TcConnectionrec* tcPtrP)
14394 {
14395   switch(scanPtrP->scanType){
14396   case ScanRecord::SCAN:
14397     break;
14398   case ScanRecord::COPY:
14399     return;
14400 #ifdef NDEBUG
14401   case ScanRecord::ST_IDLE:
14402   default:
14403     return;
14404 #else
14405   case ScanRecord::ST_IDLE:
14406     ndbabort();
14407 #endif
14408   }
14409 
14410   const Uint32 now = cLqhTimeOutCount;         // measure in 10ms
14411   const Uint32 last = scanPtrP->scanTcWaiting; // last time we reported to TC (10ms)
14412   const Uint32 timeout = cTransactionDeadlockDetectionTimeout; // (ms)
14413   const Uint32 limit = timeout / 16;
14414   const Uint32 time_waiting = (now - last) * 10; // Convert to ms
14415 
14416   ndbassert(limit > 0);
14417   /**
14418    * We need to ensure we send heartbeats before TC decides to timeout based
14419    * on a deadlock. We use 1/16th of the timeout period for this. The other
14420    * thing to consider is wraparound, but this will work fine since then we
14421    * we will get an immediate very high number and immediate timeout.
14422   */
14423 
14424   if (time_waiting > limit)
14425   {
14426     jam();
14427 
14428     scanPtrP->scanTcWaiting = Uint32(now);
14429     if (tcPtrP->tcTimer != 0)
14430     {
14431       tcPtrP->tcTimer = Uint32(now);
14432     }
14433 
14434     Uint32 save[4];
14435     save[0] = signal->theData[0];
14436     save[1] = signal->theData[1];
14437     save[2] = signal->theData[2];
14438     save[3] = signal->getLength();
14439 
14440     signal->theData[0] = tcPtrP->clientConnectrec;
14441     signal->theData[1] = tcPtrP->transid[0];
14442     signal->theData[2] = tcPtrP->transid[1];
14443     sendSignal(tcPtrP->clientBlockref,
14444                GSN_SCAN_HBREP, signal, 3, JBB);
14445 
14446     signal->theData[0] = save[0];
14447     signal->theData[1] = save[1];
14448     signal->theData[2] = save[2];
14449     signal->setLength(save[3]);
14450   }
14451 }
14452 
14453 Uint32
copyNextRange(Uint32 * dst,TcConnectionrec * tcPtrP)14454 Dblqh::copyNextRange(Uint32 * dst, TcConnectionrec* tcPtrP)
14455 {
14456   /**
14457    * Copy the bound info for the next range from the KeyInfo
14458    * to *dst
14459    * There may be zero or more bounds
14460    * A SectionReader is used to read bound information, its
14461    * position is saved between calls
14462    * This method also extracts range numbers from the
14463    * KeyInfo
14464    */
14465   Uint32 totalLen = tcPtrP->primKeyLen;
14466   if (totalLen == 0)
14467   {
14468     return 0;
14469   }
14470 
14471   Uint32 * save = dst;
14472   do
14473   {
14474     ndbassert( tcPtrP->keyInfoIVal != RNIL );
14475     SectionReader keyInfoReader(tcPtrP->keyInfoIVal,
14476                                 g_sectionSegmentPool);
14477 
14478     if (tcPtrP->m_flags & TcConnectionrec::OP_SCANKEYINFOPOSSAVED)
14479     {
14480       /* Second or higher range in an MRR scan
14481        * Restore SectionReader to the last position it was in
14482        */
14483       bool ok= keyInfoReader.setPos(tcPtrP->scanKeyInfoPos);
14484       ndbrequire(ok);
14485     }
14486 
14487     /* Get first word of next range and extract range
14488      * length, number from it.
14489      * For non MRR, these will be zero.
14490      */
14491     Uint32 firstWord;
14492     ndbrequire( keyInfoReader.getWord(&firstWord) );
14493     const Uint32 rangeLen= (firstWord >> 16) ? (firstWord >> 16) : totalLen;
14494     Uint32 range_no = (firstWord & 0xFFF0) >> 4;
14495     tcPtrP->m_scan_curr_range_no= range_no;
14496     tcPtrP->m_corrFactorLo &= 0x0000FFFF;
14497     tcPtrP->m_corrFactorLo |= (range_no << 16);
14498     firstWord &= 0xF; // Remove length+range num from first word
14499 
14500     /* Write range info to dst */
14501     *(dst++)= firstWord;
14502     bool ok= keyInfoReader.getWords(dst, rangeLen - 1);
14503     ndbassert(ok);
14504     if (unlikely(!ok))
14505       break;
14506 
14507     if (ERROR_INSERTED(5074))
14508       break;
14509 
14510     tcPtrP->primKeyLen-= rangeLen;
14511 
14512     if (rangeLen == totalLen)
14513     {
14514       /* All range information has been copied, free the section */
14515       releaseSection(tcPtrP->keyInfoIVal);
14516       tcPtrP->keyInfoIVal= RNIL;
14517     }
14518     else
14519     {
14520       /* Save position of SectionReader for next range (if any) */
14521       tcPtrP->scanKeyInfoPos= keyInfoReader.getPos();
14522       tcPtrP->m_flags|= TcConnectionrec::OP_SCANKEYINFOPOSSAVED;
14523     }
14524 
14525     return rangeLen;
14526   } while (0);
14527 
14528   /**
14529    * We enter here if there was some error in the keyinfo
14530    *   this has (once) been seen in customer lab,
14531    *   never at in the wild, and never in internal lab.
14532    *   root-cause unknown, maybe ndbapi application bug
14533    *
14534    * Crash in debug, or ERROR_INSERT (unless 5074)
14535    * else
14536    *   generate an incorrect bound...that will make TUX abort the scan
14537    */
14538 #ifdef ERROR_INSERT
14539   ndbrequire(ERROR_INSERTED_CLEAR(5074));
14540 #else
14541   ndbassert(false);
14542 #endif
14543 
14544   * save = TuxBoundInfo::InvalidBound;
14545   return 1;
14546 }
14547 
14548 /* -------------------------------------------------------------------------
14549  * When executing a scan we must come up to the surface at times to wait
14550  * for a resource to become available
14551  * ------------------------------------------------------------------------- */
execCHECK_LCP_STOP(Signal * signal)14552 void Dblqh::execCHECK_LCP_STOP(Signal* signal)
14553 {
14554   const CheckLcpStop* cls = (const CheckLcpStop*) signal->theData;
14555   const Uint32 scanPtrI = cls->scanPtrI;
14556   jamEntry();
14557   switch(cls->scanState)
14558   {
14559   case CheckLcpStop::ZSCAN_RUNNABLE:
14560   {
14561     jam();
14562     return;
14563   }
14564   case CheckLcpStop::ZSCAN_RESOURCE_WAIT:
14565   case CheckLcpStop::ZSCAN_RESOURCE_WAIT_STOPPABLE:
14566   {
14567     jam();
14568     /**
14569      * Scan waiting for a resource, we will use a
14570      * delayed CONTINUEB to continue it later
14571      *
14572      * Not appropriate to send SCAN_HBREP here since we are not making
14573      * progress and thus sending SCAN_HBREP would nullify TC from being
14574      * able to discover deadlocks.
14575      */
14576 
14577     /* Tracking */
14578     ScanRecordPtr loc_scanptr;
14579     loc_scanptr.i = scanPtrI;
14580     ndbrequire(c_scanRecordPool.getUncheckedPtrRW(loc_scanptr));
14581     loc_scanptr.p->scan_lastSeen = __LINE__;
14582     loc_scanptr.p->scan_check_lcp_stop++;
14583     ndbrequire(Magic::check_ptr(loc_scanptr.p));
14584     if (loc_scanptr.p->scan_check_lcp_stop >= 5 &&
14585         cls->scanState == CheckLcpStop::ZSCAN_RESOURCE_WAIT_STOPPABLE)
14586     {
14587       jam();
14588       /**
14589        * STOPPABLE resource wait is e.g. Out of operation records, here
14590        * we only wait for a short time to slow things down a bit, but
14591        * we don't wait indefinitely like we do for e.g. locked rows.
14592        * Lock timeouts are handled by DBTC, but we can conclude on our
14593        * own to abort if we are out of operation records.
14594        *
14595        * Report error as out of operation records, will be reported as
14596        * out of operation records in local data manager to application
14597        * with the advice to raise TransactionMemory.
14598        */
14599       loc_scanptr.p->scanCompletedStatus = ZTRUE;
14600       Ptr<TcConnectionrec> regTcPtr;
14601       regTcPtr.i = loc_scanptr.p->scanTcrec;
14602       ndbrequire(tcConnect_pool.getValidPtr(regTcPtr));
14603       regTcPtr.p->errorCode = ZNO_TC_CONNECT_ERROR;
14604     }
14605     if (loc_scanptr.p->scanCompletedStatus == ZTRUE)
14606     {
14607       jam();
14608       /* Tell caller to abort scan */
14609       signal->theData[0] = CheckLcpStop::ZABORT_SCAN;
14610       return;
14611     }
14612 
14613     signal->theData[0] = ZCHECK_LCP_STOP_BLOCKED;
14614     signal->theData[1] = scanPtrI;
14615     sendSignalWithDelay(cownref, GSN_CONTINUEB, signal, 1, 2);
14616 
14617     /* Tell caller to take a break */
14618     signal->theData[0] = CheckLcpStop::ZTAKE_A_BREAK;
14619     return;
14620   }
14621   case CheckLcpStop::ZSCAN_RUNNABLE_YIELD:
14622   {
14623     jam();
14624     /**
14625      * Scan voluntarily yielding cpu
14626      * We will use an immediate CONTINUEB to continue
14627      * it ASAP.
14628      */
14629     {
14630       /* Consider sending SCAN_HBREP if appropriate */
14631       ScanRecordPtr scan;
14632       TcConnectionrecPtr tc;
14633       scan.i = scanPtrI;
14634       ndbrequire(c_scanRecordPool.getValidPtr(scan));
14635       tc.i = scan.p->scanTcrec;
14636       ndbrequire(tcConnect_pool.getValidPtr(tc));
14637       check_send_scan_hb_rep(signal, scan.p, tc.p);
14638 
14639       /* Tracking */
14640       scan.p->scan_lastSeen = __LINE__;
14641       scan.p->scan_check_lcp_stop++;
14642     }
14643 
14644     signal->theData[0] = ZCHECK_LCP_STOP_BLOCKED;
14645     signal->theData[1] = scanPtrI;
14646     sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
14647 
14648     /* Tell caller to take a break */
14649     signal->theData[0] = CheckLcpStop::ZTAKE_A_BREAK;
14650     return;
14651   }
14652   default:
14653     jam();
14654     jamLine(cls->scanState);
14655     ndbabort();
14656   }
14657 }//Dblqh::execCHECK_LCP_STOP()
14658 
14659 /* -------------------------------------------------------------------------
14660  *       ENTER NEXT_SCANCONF
14661  * -------------------------------------------------------------------------
14662  *       PRECONDITION: SCAN_STATE = WAIT_NEXT_SCAN
14663  * ------------------------------------------------------------------------- */
nextScanConfScanLab(Signal * signal,ScanRecord * const scanPtr,Uint32 fragId,Uint32 accOpPtr,const TcConnectionrecPtr tcConnectptr)14664 void Dblqh::nextScanConfScanLab(Signal* signal,
14665                                 ScanRecord * const scanPtr,
14666                                 Uint32 fragId,
14667                                 Uint32 accOpPtr,
14668                                 const TcConnectionrecPtr tcConnectptr)
14669 {
14670   TcConnectionrec * const regTcPtr = tcConnectptr.p;
14671   if (likely(fragId != RNIL && accOpPtr != RNIL))
14672   {
14673     jamDebug();
14674     check_send_scan_hb_rep(signal, scanPtr, tcConnectptr.p);
14675     scanPtr->scan_check_lcp_stop = 0;
14676     set_acc_ptr_in_scan_record(scanPtr,
14677                                scanPtr->m_curr_batch_size_rows,
14678                                accOpPtr);
14679 
14680     if (unlikely(signal->getLength() ==
14681                  NextScanConf::SignalLengthNoKeyInfo))
14682     {
14683       /**
14684        * We have found a deleted row id as part of a LCP scan.
14685        * We don't use TRANSID_AI in this case to avoid having to go through
14686        * TUP in this case. We will however call scanTupkeyConfLab to fake
14687        * that we return successfully from TUPKEYREQ. This is to simplify
14688        * the code and use the normal patterns. This means that the record
14689        * will be part of scan batch size which is necessary to ensure that
14690        * we don't risk running out of buffer space in the BACKUP block while
14691        * recording deleted row ids.
14692        *
14693        * We return with accOpPtr set to RNIL in this case to avoid
14694        * complications when releasing locks.
14695        */
14696       NextScanConf * const nextScanConf = (NextScanConf *)&signal->theData[0];
14697       Uint32 gci = nextScanConf->gci;
14698       Uint32 readLength;
14699       /**
14700        * Coming here only happens for LCP scans and these always returns
14701        * row ids, both in TRANSID_AIs and in return NEXT_SCANCONF.
14702        */
14703       if (scanPtr->m_row_id.m_page_idx == ZNIL)
14704       {
14705         jam();
14706         /* gci transports record_size in this case */
14707         c_backup->record_deleted_pageid(scanPtr->m_row_id.m_page_no, gci);
14708         readLength = 2;
14709       }
14710       else
14711       {
14712         jam();
14713         c_backup->record_deleted_rowid(scanPtr->m_row_id.m_page_no,
14714                                        scanPtr->m_row_id.m_page_idx,
14715                                        gci);
14716         readLength = 3;
14717       }
14718       ndbrequire(scanPtr->m_curr_batch_size_rows < MAX_PARALLEL_OP_PER_SCAN);
14719       scanPtr->m_exec_direct_batch_size_words += readLength;
14720       scanPtr->m_curr_batch_size_bytes+= readLength * sizeof(Uint32);
14721       scanPtr->m_curr_batch_size_rows++;
14722       scanPtr->m_last_row = false;
14723       scanPtr->scanFlag = NextScanReq::ZSCAN_NEXT;
14724 
14725       if (!scanPtr->check_scan_batch_completed())
14726       {
14727         jam();
14728         scanNextLoopLab(signal,
14729                         regTcPtr->clientConnectrec,
14730                         RNIL,
14731                         scanPtr,
14732                         fragptr.p);
14733         return;
14734       }
14735       else
14736       {
14737         jam();
14738         scanPtr->scanState = ScanRecord::WAIT_SCAN_NEXTREQ;
14739         scanPtr->scan_lastSeen = __LINE__;
14740         sendScanFragConf(signal, ZFALSE, regTcPtr);
14741         return;
14742       }
14743     }
14744 
14745     /* ----------------------------------------------------------------------
14746      *       STOP THE SCAN PROCESS IF THIS HAS BEEN REQUESTED.
14747      * ---------------------------------------------------------------------- */
14748     if (unlikely(scanPtr->scanCompletedStatus == ZTRUE))
14749     {
14750       if ((scanPtr->scanLockHold == ZTRUE) &&
14751           (scanPtr->m_curr_batch_size_rows > 0))
14752       {
14753         jam();
14754         scanPtr->scanReleaseCounter = 1;
14755         scanReleaseLocksLab(signal, regTcPtr);
14756         return;
14757       }//if
14758       jam();
14759       closeScanLab(signal, regTcPtr);
14760       return;
14761     }//if
14762 
14763     Fragrecord* fragPtrP = prim_tab_fragptr.p;
14764     bool disk_table = regTcPtr->m_disk_table;
14765     regTcPtr->transactionState = TcConnectionrec::SCAN_TUPKEY;
14766     if (likely(!disk_table))
14767     {
14768       jamDebug();
14769       next_scanconf_tupkeyreq(signal, scanPtr, regTcPtr, fragPtrP, RNIL);
14770       return;
14771     }
14772     else
14773     {
14774       jamDebug();
14775       scanPtr->scan_lastSeen = __LINE__;
14776       next_scanconf_load_diskpage(signal, scanPtr, tcConnectptr,fragPtrP);
14777       return;
14778     }
14779   }
14780   else
14781   {
14782     // If accOperationPtr == RNIL no record was returned by ACC
14783     /* ---------------------------------------------------------------------
14784      *       THERE ARE NO MORE TUPLES TO FETCH. IF WE HAVE ANY
14785      *       OPERATIONS STILL NEEDING A LOCK WE REPORT TO THE
14786      *       APPLICATION AND CLOSE THE SCAN WHEN THE NEXT SCAN
14787      *       REQUEST IS RECEIVED. IF WE DO NOT HAVE ANY NEED FOR
14788      *       LOCKS WE CAN CLOSE THE SCAN IMMEDIATELY.
14789      * --------------------------------------------------------------------- */
14790     /*************************************************************
14791      *       STOP THE SCAN PROCESS IF THIS HAS BEEN REQUESTED.
14792      ************************************************************ */
14793 
14794     if (fragId == RNIL && !scanPtr->scanLockHold)
14795     {
14796       jamDebug();
14797       closeScanLab(signal, tcConnectptr.p);
14798       return;
14799     }
14800 
14801     if (scanPtr->scanCompletedStatus == ZTRUE) {
14802       if ((scanPtr->scanLockHold == ZTRUE) &&
14803 	  (scanPtr->m_curr_batch_size_rows > 0)) {
14804 	jam();
14805 	scanPtr->scanReleaseCounter = 1;
14806         scanPtr->scan_check_lcp_stop = 0;
14807 	scanReleaseLocksLab(signal, tcConnectptr.p);
14808 	return;
14809       }//if
14810       jam();
14811       closeScanLab(signal, tcConnectptr.p);
14812       return;
14813     }//if
14814 
14815     if (scanPtr->m_curr_batch_size_rows > 0) {
14816       if (fragId == RNIL && regTcPtr->primKeyLen == 0)
14817       {
14818         jam();
14819 	scanPtr->scanCompletedStatus = ZTRUE;
14820       }
14821       jam();
14822       scanPtr->scan_check_lcp_stop = 0;
14823       scanPtr->scanState = ScanRecord::WAIT_SCAN_NEXTREQ;
14824       scanPtr->scan_lastSeen = __LINE__;
14825       sendScanFragConf(signal, ZFALSE, tcConnectptr.p);
14826       return;
14827     }//if
14828 
14829     if (fragId == RNIL)
14830     {
14831       jam();
14832       closeScanLab(signal, tcConnectptr.p);
14833       return;
14834     }
14835     else
14836     {
14837       scanPtr->scan_lastSeen = __LINE__;
14838       Uint32 sig0 = scanPtr->scanAccPtr;
14839       BlockReference blockRef = scanPtr->scanBlockref;
14840       jam();
14841       signal->theData[0] = sig0;
14842       signal->theData[1] = AccCheckScan::ZCHECK_LCP_STOP;
14843       EXECUTE_DIRECT(refToMain(blockRef),
14844                      GSN_ACC_CHECK_SCAN,
14845                      signal,
14846                      2);
14847       return;
14848     }
14849   }
14850 }//Dblqh::nextScanConfScanLab()
14851 
14852 void
next_scanconf_tupkeyreq(Signal * signal,ScanRecord * scanPtr,TcConnectionrec * regTcPtr,Fragrecord * fragPtrP,Uint32 disk_page)14853 Dblqh::next_scanconf_tupkeyreq(Signal* signal,
14854 			       ScanRecord * scanPtr,
14855 			       TcConnectionrec * regTcPtr,
14856 			       Fragrecord * fragPtrP,
14857 			       Uint32 disk_page)
14858 {
14859   TupKeyReq * const tupKeyReq = (TupKeyReq *)signal->getDataPtrSend();
14860   Uint32 reqinfo = 0;
14861   TupKeyReq::setDirtyFlag(reqinfo, (scanPtr->scanLockHold == ZFALSE));
14862   TupKeyReq::setPrioAFlag(reqinfo, scanPtr->prioAFlag);
14863   TupKeyReq::setOperation(reqinfo, regTcPtr->operation);
14864   TupKeyReq::setInterpretedFlag(reqinfo, regTcPtr->opExec);
14865   TupKeyReq::setReorgFlag(reqinfo, regTcPtr->m_reorg);
14866   TupKeyReq::setNrCopyFlag(reqinfo, ZFALSE);
14867   tupKeyReq->disk_page= disk_page;
14868   jamDebug();
14869 
14870   tupKeyReq->request = reqinfo;
14871   /* No AttrInfo sent to TUP, it uses a stored procedure */
14872   {
14873     /**
14874      * The row id here depends on if we are scanning in TUX
14875      * or in TUP or ACC. TUX returns phyiscal row ids and
14876      * TUP and ACC returns logical row ids. This is handled
14877      * by TUP.
14878      */
14879     const Uint32 keyRef1 = scanPtr->m_row_id.m_page_no;
14880     const Uint32 keyRef2 = scanPtr->m_row_id.m_page_idx;
14881     const Uint32 opRef = scanPtr->scanApiOpPtr;
14882     const Uint32 applRef = scanPtr->scanApiBlockref;
14883     const Uint32 aiLen = scanPtr->scanAiLength;
14884     tupKeyReq->keyRef1 = keyRef1;
14885     tupKeyReq->keyRef2 = keyRef2;
14886     tupKeyReq->opRef = opRef;
14887     tupKeyReq->applRef = applRef;
14888     tupKeyReq->attrBufLen = aiLen;
14889   }
14890   {
14891     const Uint32 coordinatorTC = regTcPtr->tcBlockref;
14892     const Uint32 tcOpIndex = regTcPtr->tcOprec;
14893     const Uint32 savePointId = regTcPtr->savePointId;
14894     tupKeyReq->coordinatorTC = coordinatorTC;
14895     tupKeyReq->tcOpIndex = tcOpIndex;
14896     tupKeyReq->savePointId = savePointId;
14897   }
14898   {
14899     const Uint32 seqNoReplica = regTcPtr->seqNoReplica;
14900     const Uint32 transId1 = regTcPtr->transid[0];
14901     const Uint32 transId2 = regTcPtr->transid[1];
14902 
14903     tupKeyReq->deferred_constraints = 0;
14904     tupKeyReq->disable_fk_checks = 0;
14905     const Uint32 flags = regTcPtr->m_flags;
14906     tupKeyReq->transId1 = transId1;
14907     tupKeyReq->transId2 = transId2;
14908     tupKeyReq->triggers =
14909       (flags & TcConnectionrec::OP_NO_TRIGGERS) ?
14910       TupKeyReq::OP_NO_TRIGGERS :
14911       (seqNoReplica == 0) ?
14912       TupKeyReq::OP_PRIMARY_REPLICA : TupKeyReq::OP_BACKUP_REPLICA;
14913 #ifdef VM_TRACE
14914     tupKeyReq->fragPtr = fragPtrP->tupFragptr;
14915 #endif
14916   }
14917   if (c_tup->execTUPKEYREQ(signal))
14918   {
14919     execTUPKEYCONF(signal);
14920     return;
14921   }
14922   else
14923   {
14924     execTUPKEYREF(signal);
14925     return;
14926   }
14927 }
14928 
14929 void
next_scanconf_load_diskpage(Signal * signal,ScanRecord * const scanPtr,Ptr<TcConnectionrec> regTcPtr,Fragrecord * fragPtrP)14930 Dblqh::next_scanconf_load_diskpage(Signal* signal,
14931 				   ScanRecord * const scanPtr,
14932 				   Ptr<TcConnectionrec> regTcPtr,
14933 				   Fragrecord* fragPtrP)
14934 {
14935   jam();
14936 
14937   int res;
14938 
14939   Uint32 disk_flag = (scanPtr->m_reserved) ? Page_cache_client::COPY_FRAG : 0;
14940   if ((res = c_tup->load_diskpage_scan(signal,
14941                                        regTcPtr.p->tupConnectrec,
14942                                        fragPtrP->tupFragptr,
14943                                        scanPtr->m_row_id.m_page_no,
14944                                        scanPtr->m_row_id.m_page_idx,
14945                                        scanPtr->rangeScan,
14946                                        disk_flag)) > 0)
14947   {
14948     next_scanconf_tupkeyreq(signal, scanPtr, regTcPtr.p, fragPtrP, res);
14949     return;
14950   }
14951   else if(unlikely(res != 0))
14952   {
14953     jam();
14954     TupKeyRef * ref = (TupKeyRef *)signal->getDataPtr();
14955     ref->userRef= regTcPtr.i;
14956     if (res == -1)
14957     {
14958       jam();
14959       ref->errorCode= ~0;
14960     }
14961     else
14962     {
14963       jam();
14964       ref->errorCode= -res;
14965     }
14966     execTUPKEYREF(signal);
14967     return;
14968   }
14969 }
14970 
14971 void
next_scanconf_load_diskpage_callback(Signal * signal,Uint32 callbackData,Uint32 disk_page)14972 Dblqh::next_scanconf_load_diskpage_callback(Signal* signal,
14973 					    Uint32 callbackData,
14974 					    Uint32 disk_page)
14975 {
14976   jamEntry();
14977   Ptr<TcConnectionrec> regTcPtr;
14978   regTcPtr.i= callbackData;
14979   ndbrequire(tcConnect_pool.getValidPtr(regTcPtr));
14980   /**
14981    * We have returned from a real-time break, we need to set up
14982    * the proper block pointers for scan execution.
14983    */
14984   setup_scan_pointers_from_tc_con(regTcPtr);
14985 
14986   ScanRecord *scanPtr = scanptr.p;
14987   Fragrecord *fragPtrP = prim_tab_fragptr.p;
14988   if (disk_page > 0)
14989   {
14990     if (scanPtr->rangeScan)
14991     {
14992       jam();
14993       c_tup->prepare_scan_tux_TUPKEYREQ(scanPtr->m_row_id.m_page_no,
14994                                         scanPtr->m_row_id.m_page_idx);
14995     }
14996     else
14997     {
14998       jam();
14999       c_tup->prepare_scanTUPKEYREQ(scanPtr->m_row_id.m_page_no,
15000                                    scanPtr->m_row_id.m_page_idx);
15001     }
15002     next_scanconf_tupkeyreq(signal, scanPtr, regTcPtr.p, fragPtrP, disk_page);
15003     return;
15004   }
15005   else
15006   {
15007     jam();
15008     TupKeyRef * ref = (TupKeyRef *)signal->getDataPtr();
15009     ref->userRef= callbackData;
15010     ref->errorCode= disk_page;
15011     execTUPKEYREF(signal);
15012     return;
15013   }
15014 }
15015 
15016 /* -------------------------------------------------------------------------
15017  *       STORE KEYINFO IN A LONG SECTION PRIOR TO SENDING
15018  * -------------------------------------------------------------------------
15019  *       PRECONDITION:   SCAN_STATE = WAIT_SCAN_KEYINFO
15020  * ------------------------------------------------------------------------- */
15021 bool
keyinfoLab(const Uint32 * src,Uint32 len,const TcConnectionrecPtr tcConnectptr)15022 Dblqh::keyinfoLab(const Uint32 * src,
15023                   Uint32 len,
15024                   const TcConnectionrecPtr tcConnectptr)
15025 {
15026   ndbassert( tcConnectptr.p->keyInfoIVal == RNIL );
15027   ndbassert( len > 0 );
15028 
15029   if (ERROR_INSERTED(5052) || ERROR_INSERTED_CLEAR(5060))
15030     return false;
15031 
15032   return(appendToSection(tcConnectptr.p->keyInfoIVal,
15033                          src,
15034                          len));
15035 }//Dblqh::keyinfoLab()
15036 
15037 Uint32
readPrimaryKeys(ScanRecord * scanP,TcConnectionrec * tcConP,Uint32 * dst)15038 Dblqh::readPrimaryKeys(ScanRecord *scanP, TcConnectionrec *tcConP, Uint32 *dst)
15039 {
15040   Uint32 tableId = prim_tab_fragptr.p->tabRef;
15041   Uint32 fragId = tcConP->fragmentid;
15042   Uint32 fragPageId = scanP->m_row_id.m_page_no;
15043   Uint32 pageIndex = scanP->m_row_id.m_page_idx;
15044 
15045   if (likely(scanP->rangeScan))
15046   {
15047     jamDebug();
15048     fragPageId = c_tup->get_current_frag_page_id();
15049   }
15050   int ret = c_tup->accReadPk(tableId, fragId, fragPageId, pageIndex, dst, false);
15051   jamEntry();
15052   if(0)
15053     ndbout_c("readPrimaryKeys(table: %d fragment: %d [ %d %d ] -> %d",
15054 	     tableId, fragId, fragPageId, pageIndex, ret);
15055   ndbassert(ret > 0);
15056 
15057   return ret;
15058 }
15059 
15060 /* -------------------------------------------------------------------------
15061  *         ENTER TUPKEYCONF
15062  * -------------------------------------------------------------------------
15063  *       PRECONDITION:   TRANSACTION_STATE = SCAN_TUPKEY
15064  * ------------------------------------------------------------------------- */
scanTupkeyConfLab(Signal * signal,TcConnectionrec * regTcPtr)15065 void Dblqh::scanTupkeyConfLab(Signal* signal,
15066                               TcConnectionrec* regTcPtr)
15067 {
15068   ScanRecord * const scanPtr = scanptr.p;
15069   Uint32 scan_direct_count = m_scan_direct_count;
15070   const TupKeyConf * conf = (TupKeyConf *)signal->getDataPtr();
15071   Uint32 read_len = conf->readLength;
15072   Uint32 last_row = conf->lastRow | scanPtr->m_first_match_flag;
15073   m_scan_direct_count = scan_direct_count + 1;
15074 
15075   if (!scanPtr->lcpScan)
15076   {
15077     Fragrecord::UsageStat& useStat = fragptr.p->m_useStat;
15078     ndbassert(useStat.m_scanFragReqCount > 0);
15079 
15080     useStat.m_scanRowsExamined++;
15081     useStat.m_scanInstructionCount += conf->noExecInstructions;
15082   }
15083 
15084   regTcPtr->transactionState = TcConnectionrec::SCAN_STATE_USED;
15085 
15086 
15087   const Uint32 rows = scanPtr->m_curr_batch_size_rows;
15088   const Uint32 accOpPtr= get_acc_ptr_from_scan_record(scanPtr, rows, false);
15089   if (accOpPtr != (Uint32)-1)
15090   {
15091     c_acc->execACCKEY_ORD_no_ptr(signal, accOpPtr);
15092     jamEntry();
15093   }
15094   else
15095   {
15096     ndbassert(refToBlock(scanPtr->scanBlockref) != DBACC);
15097   }
15098 
15099   if (unlikely(scanPtr->scanCompletedStatus == ZTRUE))
15100   {
15101     /* ---------------------------------------------------------------------
15102      *       STOP THE SCAN PROCESS IF THIS HAS BEEN REQUESTED.
15103      * --------------------------------------------------------------------- */
15104     if ((scanPtr->scanLockHold == ZTRUE) && rows)
15105     {
15106       jam();
15107       scanPtr->scanReleaseCounter = 1;
15108       scanReleaseLocksLab(signal, regTcPtr);
15109       return;
15110     }//if
15111     jam();
15112     closeScanLab(signal, regTcPtr);
15113     return;
15114   }//if
15115   if (unlikely(scanPtr->scanKeyinfoFlag))
15116   {
15117     jam();
15118     // Inform API about keyinfo len aswell
15119     read_len += sendKeyinfo20(signal, scanPtr, regTcPtr);
15120   }//if
15121   ndbrequire(scanPtr->m_curr_batch_size_rows < MAX_PARALLEL_OP_PER_SCAN);
15122   scanPtr->m_exec_direct_batch_size_words += read_len;
15123   scanPtr->m_curr_batch_size_bytes+= read_len * sizeof(Uint32);
15124   scanPtr->m_curr_batch_size_rows = rows + 1;
15125   scanPtr->m_last_row = last_row;
15126 
15127   const NodeBitmask& all = globalTransporterRegistry.get_status_slowdown();
15128   if (unlikely(!all.isclear()))
15129   {
15130     if (all.get(refToNode(scanptr.p->scanApiBlockref)))
15131     {
15132       /**
15133        * End scan batch if transporter-buffer are in slowdown state
15134        *
15135        */
15136       scanPtr->m_stop_batch = 1;
15137 
15138       c_scanSlowDowns++;
15139     }
15140   }
15141 
15142   if (scanPtr->check_scan_batch_completed() || last_row)
15143   {
15144     if (scanPtr->scanLockHold == ZTRUE)
15145     {
15146       jam();
15147       scanPtr->scanState = ScanRecord::WAIT_SCAN_NEXTREQ;
15148       scanPtr->scan_lastSeen = __LINE__;
15149       sendScanFragConf(signal, ZFALSE, regTcPtr);
15150       return;
15151     }
15152     else
15153     {
15154       jam();
15155       scanPtr->scanReleaseCounter = rows + 1;
15156       scanReleaseLocksLab(signal, regTcPtr);
15157       return;
15158     }
15159   }
15160   if (unlikely(scanPtr->scanLockHold == ZTRUE))
15161   {
15162     jamDebug();
15163     scanPtr->scanFlag = NextScanReq::ZSCAN_NEXT;
15164     scanNextLoopLab(signal,
15165                     regTcPtr->clientConnectrec,
15166                     RNIL,
15167                     scanPtr,
15168                     fragptr.p);
15169     return;
15170   }
15171   else
15172   {
15173     jamDebug();
15174     scanPtr->scanFlag = NextScanReq::ZSCAN_NEXT_COMMIT;
15175     Uint32 accOpPtr= get_acc_ptr_from_scan_record(scanPtr,
15176 					   scanPtr->m_curr_batch_size_rows-1,
15177 					   false);
15178     scanNextLoopLab(signal,
15179                     regTcPtr->clientConnectrec,
15180                     accOpPtr,
15181                     scanPtr,
15182                     fragptr.p);
15183     return;
15184   }
15185 }//Dblqh::scanTupkeyConfLab()
15186 
15187 /* -------------------------------------------------------------------------
15188  *         ENTER TUPKEYREF WITH
15189  *               TC_CONNECTPTR,
15190  *               TERROR_CODE
15191  * -------------------------------------------------------------------------
15192  *       PRECONDITION:   TRANSACTION_STATE = SCAN_TUPKEY
15193  * ------------------------------------------------------------------------- */
scanTupkeyRefLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)15194 void Dblqh::scanTupkeyRefLab(Signal* signal,
15195                              const TcConnectionrecPtr tcConnectptr)
15196 {
15197   TcConnectionrec * const regTcPtr = tcConnectptr.p;
15198   ScanRecord * const scanPtr = scanptr.p;
15199   regTcPtr->transactionState = TcConnectionrec::SCAN_STATE_USED;
15200 
15201   if (!scanPtr->lcpScan)
15202   {
15203     Fragrecord::UsageStat& useStat = fragptr.p->m_useStat;
15204     ndbassert(useStat.m_scanFragReqCount > 0);
15205 
15206     useStat.m_scanRowsExamined++;
15207 
15208     const TupKeyRef* const ref =
15209       reinterpret_cast<const TupKeyRef*>(signal->getDataPtr());
15210     useStat.m_scanInstructionCount += ref->noExecInstructions;
15211   }
15212 
15213   Uint32 rows = scanPtr->m_curr_batch_size_rows;
15214   Uint32 accOpPtr= get_acc_ptr_from_scan_record(scanPtr, rows, false);
15215   if (accOpPtr != (Uint32)-1)
15216   {
15217     c_acc->execACCKEY_ORD_no_ptr(signal, accOpPtr);
15218     jamEntryDebug();
15219   }
15220   else
15221   {
15222     ndbassert(refToBlock(scanPtr->scanBlockref) != DBACC);
15223     jamDebug();
15224   }
15225   if (unlikely(scanPtr->scanCompletedStatus == ZTRUE))
15226   {
15227     /* ---------------------------------------------------------------------
15228      *       STOP THE SCAN PROCESS IF THIS HAS BEEN REQUESTED.
15229      * --------------------------------------------------------------------- */
15230     if ((scanPtr->scanLockHold == ZTRUE) && rows)
15231     {
15232       jam();
15233       scanPtr->scanReleaseCounter = 1;
15234       scanReleaseLocksLab(signal, tcConnectptr.p);
15235       return;
15236     }//if
15237     jam();
15238     closeScanLab(signal, tcConnectptr.p);
15239     return;
15240   }//if
15241   if (unlikely((terrorCode != ZUSER_SEARCH_CONDITION_FALSE_CODE) &&
15242                (terrorCode != ZNO_TUPLE_FOUND)))
15243   {
15244 #ifdef VM_TRACE
15245     ndbout << "Dblqh::scanTupkeyRefLab() aborting scan terrorCode="
15246            << terrorCode << endl;
15247 #endif
15248     jamDebug();
15249     scanPtr->scanErrorCounter++;
15250     tcConnectptr.p->errorCode = terrorCode;
15251 
15252     if (scanPtr->scanLockHold == ZTRUE && rows > 0)
15253     {
15254       jam();
15255       scanPtr->scanReleaseCounter = 1;
15256     }
15257     else
15258     {
15259       jam();
15260       scanPtr->m_curr_batch_size_rows = rows + 1;
15261       scanPtr->scanReleaseCounter = rows + 1;
15262     }//if
15263     /* --------------------------------------------------------------------
15264      *       WE NEED TO RELEASE ALL LOCKS CURRENTLY
15265      *       HELD BY THIS SCAN.
15266      * -------------------------------------------------------------------- */
15267     scanReleaseLocksLab(signal, tcConnectptr.p);
15268     return;
15269   }//if
15270   Uint32 time_passed = cLqhTimeOutCount - tcConnectptr.p->tcTimer;
15271   if (unlikely(rows && time_passed > 1))
15272   {
15273   /* -----------------------------------------------------------------------
15274    *  WE NEED TO ENSURE THAT WE DO NOT SEARCH FOR THE NEXT TUPLE FOR A
15275    *  LONG TIME WHILE WE KEEP A LOCK ON A FOUND TUPLE. WE RATHER REPORT
15276    *  THE FOUND TUPLE IF FOUND TUPLES ARE RARE. If more than 10 ms passed we
15277    *  send the found tuples to the API.
15278    * ----------------------------------------------------------------------- */
15279     scanPtr->scanReleaseCounter = rows + 1;
15280     scanReleaseLocksLab(signal, tcConnectptr.p);
15281     return;
15282   }
15283   scanPtr->scanFlag = NextScanReq::ZSCAN_NEXT_COMMIT;
15284   scanPtr->scan_acc_index--;
15285   scanNextLoopLab(signal,
15286                   tcConnectptr.p->clientConnectrec,
15287                   accOpPtr,
15288                   scanPtr,
15289                   fragptr.p);
15290 }//Dblqh::scanTupkeyRefLab()
15291 
15292 /* -------------------------------------------------------------------------
15293  *   THE SCAN HAS BEEN COMPLETED. EITHER BY REACHING THE END OR BY COMMAND
15294  *   FROM THE APPLICATION OR BY SOME SORT OF ERROR CONDITION.
15295  * ------------------------------------------------------------------------- */
closeScanLab(Signal * signal,TcConnectionrec * regTcPtr)15296 void Dblqh::closeScanLab(Signal* signal, TcConnectionrec* regTcPtr)
15297 {
15298   FragrecordPtr regFragPtr = fragptr;
15299   ScanRecord * const scanPtr = scanptr.p;
15300   SimulatedBlock *block = scanPtr->scanBlock;
15301   const Uint32 sig0 = scanPtr->scanAccPtr;
15302   Fragrecord::FragStatus fragstatus = regFragPtr.p->fragStatus;
15303   ExecFunction f = scanPtr->scanFunction_NEXT_SCANREQ;
15304 
15305   scanPtr->scanState = ScanRecord::WAIT_CLOSE_SCAN;
15306   scanPtr->scan_lastSeen = __LINE__;
15307   scanPtr->scan_check_lcp_stop = 0;
15308   regTcPtr->transactionState = TcConnectionrec::SCAN_STATE_USED;
15309   signal->theData[1] = RNIL;
15310   signal->theData[2] = NextScanReq::ZSCAN_CLOSE;
15311   signal->theData[0] = sig0;
15312   ndbrequire(is_scan_ok(scanPtr, fragstatus));
15313   scanPtr->scanAccPtr = RNIL;
15314   block->EXECUTE_DIRECT_FN(f, signal);
15315 }//Dblqh::closeScanLab()
15316 
15317 /* -------------------------------------------------------------------------
15318  *       ENTER NEXT_SCANCONF
15319  * -------------------------------------------------------------------------
15320  *       PRECONDITION: SCAN_STATE = WAIT_CLOSE_SCAN
15321  * ------------------------------------------------------------------------- */
accScanCloseConfLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)15322 void Dblqh::accScanCloseConfLab(Signal* signal,
15323                                 const TcConnectionrecPtr tcConnectptr)
15324 {
15325   ScanRecord * const scanPtr = scanptr.p;
15326 
15327   /* Do we have another range to scan? */
15328   if((tcConnectptr.p->primKeyLen > 0) &&
15329      (scanPtr->scanCompletedStatus != ZTRUE))
15330   {
15331     jam();
15332     /* Start next range scan...*/
15333     m_scan_direct_count++;
15334     continueAfterReceivingAllAiLab(signal, tcConnectptr);
15335     return;
15336   }
15337   TcConnectionrec * const regTcPtr = tcConnectptr.p;
15338 
15339   const Uint32 sig0 = regTcPtr->tupConnectrec;
15340   const Uint32 sig1 = regTcPtr->tableref;
15341   const Uint32 sig2 = scanPtr->scanSchemaVersion;
15342   const Uint32 sig4 = scanPtr->scanStoredProcId;
15343   const Uint32 sig5 = scanPtr->scanApiBlockref;
15344 
15345   signal->theData[0] = sig0;
15346   signal->theData[1] = sig1;
15347   signal->theData[2] = sig2;
15348   signal->theData[3] = ZDELETE_STORED_PROC_ID;
15349   signal->theData[4] = sig4;
15350   signal->theData[5] = sig5;
15351   c_tup->execSTORED_PROCREQ(signal);
15352   scanPtr->scanStoredProcId = RNIL;
15353   ndbrequire(signal->theData[0] == 0);
15354 /* -------------------------------------------------------------------------
15355  *       ENTER STORED_PROCCONF
15356  * ------------------------------------------------------------------------- */
15357   tupScanCloseConfLab(signal, tcConnectptr);
15358 }//Dblqh::accScanCloseConfLab()
15359 
tupScanCloseConfLab(Signal * signal,TcConnectionrecPtr tcConnectptr)15360 void Dblqh::tupScanCloseConfLab(Signal* signal,
15361                                 TcConnectionrecPtr tcConnectptr)
15362 {
15363   if (scanptr.p->copyPtr != RNIL)
15364   {
15365     jamDebug();
15366     DEB_COPY(("(%u)tupScanCloseConfLab from COPY_FRAGREQ", instance()));
15367     tupCopyCloseConfLab(signal, tcConnectptr);
15368     return;
15369   }
15370   TcConnectionrec * const regTcPtr = tcConnectptr.p;
15371   if (regTcPtr->abortState == TcConnectionrec::NEW_FROM_TC) {
15372     jam();
15373     TcNodeFailRecordPtr tcNodeFailPtr;
15374     tcNodeFailPtr.i = regTcPtr->tcNodeFailrec;
15375     ptrCheckGuard(tcNodeFailPtr, ctcNodeFailrecFileSize, tcNodeFailRecord);
15376     tcNodeFailPtr.p->tcRecNow = tcConnectptr.i + 1;
15377     signal->theData[0] = ZLQH_TRANS_NEXT;
15378     signal->theData[1] = tcNodeFailPtr.i;
15379     sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
15380   } else if (regTcPtr->errorCode != 0) {
15381     jam();
15382     ScanFragRef * ref = (ScanFragRef*)&signal->theData[0];
15383     ref->senderData = regTcPtr->clientConnectrec;
15384     ref->transId1 = regTcPtr->transid[0];
15385     ref->transId2 = regTcPtr->transid[1];
15386     ref->errorCode = regTcPtr->errorCode;
15387     sendSignal(tcConnectptr.p->clientBlockref, GSN_SCAN_FRAGREF, signal,
15388 	 ScanFragRef::SignalLength, JBB);
15389   } else {
15390     jam();
15391     sendScanFragConf(signal, ZSCAN_FRAG_CLOSED, tcConnectptr.p);
15392   }//if
15393   handle_finish_scan(signal, tcConnectptr);
15394 }//Dblqh::tupScanCloseConfLab()
15395 
handle_finish_scan(Signal * signal,TcConnectionrecPtr tcConnectptr)15396 void Dblqh::handle_finish_scan(Signal* signal,
15397                                TcConnectionrecPtr tcConnectptr)
15398 {
15399   ScanRecordPtr restart;
15400   bool restart_flag = finishScanrec(signal, restart, tcConnectptr);
15401   if (likely(scanptr.p->scanState != ScanRecord::WAIT_START_QUEUED_SCAN))
15402   {
15403     releaseScanrec(signal);
15404   }
15405   else
15406   {
15407     /**
15408      * We are waiting for a START QUEUED SCAN signal (CONTINUEB).
15409      * Until this has arrived we cannot release the scan record.
15410      */
15411     jam();
15412     scanptr.p->scanState = ScanRecord::QUIT_START_QUEUE_SCAN;
15413   }
15414   tcConnectptr.p->tcScanRec = RNIL;
15415   deleteTransidHash(signal, tcConnectptr);
15416   releaseOprec(signal, tcConnectptr);
15417   releaseTcrec(signal, tcConnectptr);
15418   if (restart_flag)
15419   {
15420     jam();
15421     restart.p->scanState = ScanRecord::WAIT_START_QUEUED_SCAN;
15422     signal->theData[0] = ZSTART_QUEUED_SCAN;
15423     signal->theData[1] = restart.i;
15424     sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
15425   }
15426 }
15427 
restart_queued_scan(Signal * signal,Uint32 scanPtrI)15428 void Dblqh::restart_queued_scan(Signal* signal, Uint32 scanPtrI)
15429 {
15430   ScanRecordPtr loc_scanptr;
15431   loc_scanptr.i = scanPtrI;
15432   ndbrequire(c_scanRecordPool.getValidPtr(loc_scanptr));
15433   if (loc_scanptr.p->scanState == ScanRecord::QUIT_START_QUEUE_SCAN)
15434   {
15435     jam();
15436     scanptr = loc_scanptr;
15437     releaseScanrec(signal);
15438     return;
15439   }
15440   ndbrequire(loc_scanptr.p->scanState == ScanRecord::WAIT_START_QUEUED_SCAN);
15441   ndbrequire(loc_scanptr.p->copyPtr == RNIL);
15442   setup_scan_pointers(scanPtrI);
15443   m_scan_direct_count = ZMAX_SCAN_DIRECT_COUNT - 8;
15444   // Hiding read only version in outer scope
15445   continueAfterReceivingAllAiLab(signal, m_tc_connect_ptr);
15446   return;
15447 }
15448 
15449 /* =========================================================================
15450  * =======              INITIATE SCAN RECORD                         =======
15451  *
15452  *       SUBROUTINE SHORT NAME = ISC
15453  * ========================================================================= */
initScanrec(const ScanFragReq * scanFragReq,Uint32 aiLen,const TcConnectionrecPtr tcConnectptr)15454 Uint32 Dblqh::initScanrec(const ScanFragReq* scanFragReq,
15455                           Uint32 aiLen,
15456                           const TcConnectionrecPtr tcConnectptr)
15457 {
15458   ScanRecord * const scanPtr = scanptr.p;
15459 
15460   const Uint32 reqinfo = scanFragReq->requestInfo;
15461   const Uint32 keyinfo = ScanFragReq::getKeyinfoFlag(reqinfo);
15462   const Uint32 scanLockHold = ScanFragReq::getHoldLockFlag(reqinfo);
15463   const Uint32 schemaVersion = scanFragReq->schemaVersion;
15464 
15465   scanPtr->scanAiLength = aiLen;
15466   scanPtr->copyPtr = RNIL;
15467   scanPtr->scanStoredProcId = RNIL;
15468   scanPtr->scanAccPtr = RNIL;
15469   scanPtr->scanNumber = ~0;
15470   m_scan_direct_count = ZMAX_SCAN_DIRECT_COUNT - 6;
15471   m_tot_scan_direct_count = 0;
15472   scanPtr->scanType = ScanRecord::SCAN;
15473   scanPtr->scanState = ScanRecord::SCAN_FREE;
15474   scanPtr->scanCompletedStatus = ZFALSE;
15475   scanPtr->scanFlag = ZFALSE;
15476   scanPtr->scanErrorCounter = 0;
15477   scanPtr->scan_lastSeen = __LINE__;
15478   scanPtr->scan_check_lcp_stop = 0;
15479   scanPtr->m_stop_batch = 0;
15480   scanPtr->m_curr_batch_size_rows = 0;
15481   scanPtr->m_curr_batch_size_bytes= 0;
15482   scanPtr->m_exec_direct_batch_size_words = 0;
15483   scanPtr->m_last_row = 0;
15484   /* Reserved scans keep their scan_acc_segments between uses */
15485   ndbrequire(scanPtr->scan_acc_segments == 0 || scanPtr->m_reserved);
15486   scanPtr->m_row_id.setNull();
15487   scanPtr->scanKeyinfoFlag = keyinfo;
15488   scanPtr->scanLockHold = scanLockHold;
15489   scanPtr->scanSchemaVersion = schemaVersion;
15490 
15491   const Uint32 scanLockMode = ScanFragReq::getLockMode(reqinfo);
15492   const Uint32 readCommitted = ScanFragReq::getReadCommittedFlag(reqinfo);
15493   const Uint32 rangeScan = ScanFragReq::getRangeScanFlag(reqinfo);
15494   const Uint32 prioAFlag = ScanFragReq::getPrioAFlag(reqinfo);
15495   const Uint32 firstMatch = ScanFragReq::getFirstMatchFlag(reqinfo);
15496 
15497   scanPtr->scanLockMode = scanLockMode;
15498   scanPtr->readCommitted = readCommitted;
15499   scanPtr->rangeScan = rangeScan;
15500   scanPtr->prioAFlag = prioAFlag;
15501   scanPtr->m_first_match_flag = firstMatch;
15502 
15503   const Uint32 descending = ScanFragReq::getDescendingFlag(reqinfo);
15504   Uint32 tupScan = ScanFragReq::getTupScanFlag(reqinfo);
15505   const Uint32 resultRef = scanFragReq->resultRef;
15506   const Uint32 tcPtrI = tcConnectptr.i;
15507 
15508   scanPtr->descending = descending;
15509   scanPtr->tupScan = tupScan;
15510   scanPtr->scanApiBlockref = resultRef;
15511   scanPtr->scanTcrec = tcPtrI;
15512   TcConnectionrec * const regTcPtr = tcConnectptr.p;
15513   const bool accScan = (rangeScan == 0) && (tupScan == 0);
15514 
15515   Uint32 blockRef;
15516   SimulatedBlock *block;
15517   ExecFunction f;
15518   if (accScan)
15519   {
15520     blockRef = caccBlockref;
15521     block = c_acc;
15522     f = c_acc->getExecuteFunction(GSN_NEXT_SCANREQ);
15523   }
15524   else if (! tupScan)
15525   {
15526     blockRef = ctuxBlockref;
15527     block = c_tux;
15528     f = c_tux->getExecuteFunction(GSN_NEXT_SCANREQ);
15529   }
15530   else
15531   {
15532     blockRef = ctupBlockref;
15533     block = c_tup;
15534     f = c_tup->getExecuteFunction(GSN_NEXT_SCANREQ);
15535   }
15536   scanPtr->scanBlockref = blockRef;
15537   scanPtr->scanBlock = block;
15538   scanPtr->scanFunction_NEXT_SCANREQ = f;
15539 
15540   const Uint32 lcpScan = ScanFragReq::getLcpScanFlag(reqinfo);
15541   const Uint32 statScan = ScanFragReq::getStatScanFlag(reqinfo);
15542   const Uint32 scanTcWaiting = cLqhTimeOutCount;
15543   const Uint32 scanApiOpPtr = scanFragReq->clientOpPtr;
15544   const Uint32 max_rows = scanFragReq->batch_size_rows;
15545   const Uint32 max_bytes = scanFragReq->batch_size_bytes;
15546 
15547   jamDebug();
15548   scanPtr->lcpScan = lcpScan;
15549   scanPtr->statScan = statScan;
15550   scanPtr->scanTcWaiting = scanTcWaiting;
15551   scanPtr->scanApiOpPtr = scanApiOpPtr;
15552   scanPtr->m_max_batch_size_rows = max_rows;
15553   scanPtr->m_max_batch_size_bytes = max_bytes;
15554 
15555   const Uint32 scanPrio = ScanFragReq::getScanPrio(reqinfo);
15556 
15557   if (unlikely(max_rows == 0 || (max_bytes > 0 && max_rows > max_bytes)))
15558   {
15559     jam();
15560     return ScanFragRef::ZWRONG_BATCH_SIZE;
15561   }
15562 
15563   if (ERROR_INSERTED(5057))
15564   {
15565     CLEAR_ERROR_INSERT_VALUE;
15566     return ScanFragRef::ZTOO_MANY_ACTIVE_SCAN_ERROR;
15567   }
15568 
15569 
15570   {
15571     DEBUG_RES_OWNER_GUARD(refToBlock(reference()) << 16 | 999);
15572 
15573     if (unlikely(!seize_acc_ptr_list(scanPtr, 0, max_rows)))
15574     {
15575       jam();
15576       return ScanFragRef::ZTOO_MANY_ACTIVE_SCAN_ERROR;
15577     }
15578     init_acc_ptr_list(scanPtr);
15579   }
15580 
15581   /**
15582    * Used for scan take over
15583    */
15584   FragrecordPtr tFragPtr;
15585   tFragPtr.i = fragptr.p->tableFragptr;
15586   c_fragment_pool.getPtr(tFragPtr);
15587   scanPtr->fragPtrI = fragptr.p->tableFragptr;
15588   prim_tab_fragptr = tFragPtr;
15589   c_tup->prepare_tab_pointers(prim_tab_fragptr.p->tupFragptr);
15590 
15591   /**
15592    * ACC scan uses 1 - (MAX_PARALLEL_SCANS_PER_FRAG - 1) inclusive  =  0-11
15593    * Range scans uses from MAX_PARALLEL_SCANS_PER_FRAG - MAX = 12-134
15594    * TUP scans uses from 135 - 252
15595    * The boundary between Range and TUP scans are configurable and is
15596    * set in variable c_max_parallel_scans_per_frag.
15597    */
15598 
15599   /**
15600    * ACC only supports 12 parallel scans per fragment (hard limit)
15601    * TUP/TUX does not have any such limit...but when scanning with keyinfo
15602    *         (for take-over) no more than 255 such scans can be active
15603    *         at a fragment (dur to 8 bit number in scan-keyinfo protocol)
15604    *
15605    * TODO: Make TUP/TUX limits depend on scanKeyinfoFlag (possibly with
15606    *       other config limit too)
15607    */
15608 
15609   Uint32 start, stop;
15610   Uint32 max_parallel_scans_per_frag = c_max_parallel_scans_per_frag;
15611   if (accScan)
15612   {
15613     jam();
15614     start = 0;
15615     stop = MAX_PARALLEL_SCANS_PER_FRAG;
15616   }
15617   else if (rangeScan)
15618   {
15619     jam();
15620     start = MAX_PARALLEL_SCANS_PER_FRAG;
15621     stop = start + max_parallel_scans_per_frag;
15622   }
15623   else
15624   {
15625     jam();
15626     ndbassert(tupScan);
15627     start = MAX_PARALLEL_SCANS_PER_FRAG + max_parallel_scans_per_frag;
15628     stop = start + max_parallel_scans_per_frag;
15629     if (stop > NR_ScanNo)
15630     {
15631       jam();
15632       stop = NR_ScanNo;
15633     }
15634   }
15635   ndbrequire((start < 32 * tFragPtr.p->m_scanNumberMask.Size) &&
15636              (stop < 32 * tFragPtr.p->m_scanNumberMask.Size));
15637 
15638   const BlockReference senderBlock = refToMain(regTcPtr->clientBlockref);
15639   Uint32 free;
15640 
15641   if (senderBlock == BACKUP)
15642   {
15643     /**
15644      * Both LCP scans and Backup scans have predefined scan numbers.
15645      * They will never be queued and so completing them will not
15646      * start any queued scans.
15647      */
15648     if (lcpScan)
15649     {
15650       jam();
15651       free = LCP_ScanNo;
15652       c_check_scanptr_i[ZLCP_CHECK_INDEX] = scanptr.i;
15653       c_check_scanptr_save_timer[ZLCP_CHECK_INDEX] = regTcPtr->tcTimer;
15654     }
15655     else
15656     {
15657       /* Backup scan */
15658       jam();
15659       free = Backup_ScanNo;
15660       c_check_scanptr_i[ZBACKUP_CHECK_INDEX] = scanptr.i;
15661       c_check_scanptr_save_timer[ZBACKUP_CHECK_INDEX] = regTcPtr->tcTimer;
15662     }
15663     ndbassert(tFragPtr.p->m_scanNumberMask.get(free));
15664   }
15665   else
15666   {
15667     ndbassert(!lcpScan);
15668     /*
15669       This error insert causes an SPJ index scan to be queued (see ndbinfo.test).
15670       Checking 5084 twice to ensure that the optimized build will see this as
15671       'testQueue = false' and not generate code to evaluate subsequent terms.
15672     */
15673     const bool testQueue = ERROR_INSERTED(5084) && rangeScan &&
15674       refToMain(resultRef)==DBSPJ && ERROR_INSERTED_CLEAR(5084);
15675 
15676     free = testQueue ? Fragrecord::ScanNumberMask::NotFound :
15677       tFragPtr.p->m_scanNumberMask.find(start);
15678 
15679     if (free == Fragrecord::ScanNumberMask::NotFound || free >= stop)
15680     {
15681       /**
15682        * stop isn't inclusive, so we allow only ids in the range
15683        * [ start, stop ) .
15684        */
15685       jam();
15686 
15687       if(scanPrio == 0)
15688       {
15689         jam();
15690         return ScanFragRef::ZTOO_MANY_ACTIVE_SCAN_ERROR;
15691       }
15692 
15693       /**
15694        * Put on queue
15695        */
15696       scanPtr->scanState = ScanRecord::IN_QUEUE;
15697       Local_ScanRecord_fifo queue(c_scanRecordPool,
15698                                   rangeScan != 0 ?
15699                                   fragptr.p->m_queuedScans :
15700                                   tupScan  != 0 ?
15701                                   fragptr.p->m_queuedTupScans :
15702                                   fragptr.p->m_queuedAccScans);
15703       queue.addLast(scanptr);
15704       fragptr.p->m_useStat.m_queuedScanCount++;
15705       return ZOK;
15706     }
15707   }
15708   scanPtr->scanNumber = free;
15709   tFragPtr.p->m_scanNumberMask.clear(free);// Update mask
15710 
15711   {
15712     Local_ScanRecord_list active(c_scanRecordPool, fragptr.p->m_activeScans);
15713     active.addFirst(scanptr);
15714   }
15715   if(scanPtr->scanKeyinfoFlag){
15716     jam();
15717 #if defined VM_TRACE || defined ERROR_INSERT
15718     ScanRecordPtr tmp;
15719     ndbrequire(!c_scanTakeOverHash.find(tmp, * scanptr.p));
15720 #endif
15721 #ifdef TRACE_SCAN_TAKEOVER
15722     ndbout_c("adding (%d %d) table: %d fragId: %d frag.i: %d tableFragptr: %d",
15723 	     scanPtr->scanNumber, scanPtr->fragPtrI,
15724 	     tabptr.i, scanFragReq->fragmentNoKeyLen & 0xFFFF,
15725 	     fragptr.i, fragptr.p->tableFragptr);
15726 #endif
15727     c_scanTakeOverHash.add(scanptr);
15728   }
15729   return ZOK;
15730 }
15731 
15732 /* =========================================================================
15733  * =======             INITIATE TC RECORD AT SCAN                    =======
15734  *
15735  *       SUBROUTINE SHORT NAME = IST
15736  * ========================================================================= */
initScanTc(const ScanFragReq * req,Uint32 transid1,Uint32 transid2,Uint32 fragId,Uint32 nodeId,Uint32 hashHi,const TcConnectionrecPtr tcConnectptr)15737 void Dblqh::initScanTc(const ScanFragReq* req,
15738                        Uint32 transid1,
15739                        Uint32 transid2,
15740                        Uint32 fragId,
15741                        Uint32 nodeId,
15742                        Uint32 hashHi,
15743                        const TcConnectionrecPtr tcConnectptr)
15744 {
15745   TcConnectionrec * const regTcPtr = tcConnectptr.p;
15746   regTcPtr->transid[0] = transid1;
15747   regTcPtr->transid[1] = transid2;
15748   regTcPtr->fragmentid = fragId;
15749   regTcPtr->nextReplica = nodeId;
15750   regTcPtr->tcHashKeyHi = hashHi;
15751 
15752   regTcPtr->m_reorg = (req == NULL) ?
15753     (Uint8)ScanFragReq::REORG_ALL :
15754     (Uint8)ScanFragReq::getReorgFlag(req->requestInfo);
15755   TablerecPtr tTablePtr;
15756   tTablePtr.i = tabptr.p->primaryTableId;
15757   ptrCheckGuard(tTablePtr, ctabrecFileSize, tablerec);
15758   regTcPtr->m_disk_table = tTablePtr.p->m_disk_table &&
15759     (!req || !ScanFragReq::getNoDiskFlag(req->requestInfo));
15760   tabptr.p->usageCountR++;
15761 
15762   regTcPtr->dirtyOp = 0; //dirtyOp-flag not used in scans
15763   regTcPtr->indTakeOver = ZFALSE; // not used in scan
15764   regTcPtr->lastReplicaNo = 0; // not used in scan
15765   regTcPtr->errorCode = 0;
15766   regTcPtr->currTupAiLen = 0;
15767   regTcPtr->reclenAiLqhkey = 0;
15768   regTcPtr->m_scan_curr_range_no = 0;
15769   regTcPtr->m_dealloc_state = TcConnectionrec::DA_IDLE;
15770   regTcPtr->m_dealloc_data.m_dealloc_ref_count = RNIL;
15771   regTcPtr->operation = ZREAD;
15772   regTcPtr->opExec = 1;
15773   regTcPtr->abortState = TcConnectionrec::ABORT_IDLE;
15774   // set TcConnectionrec::OP_SAVEATTRINFO so that a
15775   // "old" scan (short signals) update currTupAiLen which is checked
15776   // in scanAttrinfoLab
15777   regTcPtr->m_flags = TcConnectionrec::OP_SAVEATTRINFO;
15778   regTcPtr->commitAckMarker = RNIL;
15779   regTcPtr->activeCreat = Fragrecord::AC_NORMAL;
15780 
15781   {
15782     const Uint32 scanPtrI = scanptr.i;
15783     const Uint32 tabPtrI = tabptr.i;
15784     const Uint32 fragPtrI = fragptr.i;
15785     const Uint32 tcOprec = regTcPtr->clientConnectrec;
15786     const Uint32 tcBlockref = regTcPtr->clientBlockref;
15787 
15788     regTcPtr->tcScanRec = scanPtrI;
15789     regTcPtr->tableref = tabPtrI;
15790     regTcPtr->fragmentptr = fragPtrI;
15791     regTcPtr->tcOprec = tcOprec;
15792     regTcPtr->tcBlockref = tcBlockref;
15793   }
15794 }//Dblqh::initScanTc()
15795 
15796 /* =========================================================================
15797  * =======                       FINISH  SCAN RECORD                 =======
15798  *
15799  *       REMOVE SCAN RECORD FROM PER FRAGMENT LIST.
15800  * ========================================================================= */
finishScanrec(Signal * signal,ScanRecordPtr & restart_scan,const TcConnectionrecPtr tcConnectptr)15801 bool Dblqh::finishScanrec(Signal* signal,
15802                           ScanRecordPtr &restart_scan,
15803                           const TcConnectionrecPtr tcConnectptr)
15804 {
15805   ScanRecord * const scanPtr = scanptr.p;
15806   Uint32 reserved = scanPtr->m_reserved;
15807 
15808   if (reserved == 0)
15809   {
15810     release_acc_ptr_list(scanPtr);
15811   }
15812 
15813   Uint32 tupScan = scanPtr->tupScan;
15814   Uint32 rangeScan = scanPtr->rangeScan;
15815 
15816   if (scanPtr->scanState == ScanRecord::IN_QUEUE)
15817   {
15818     Local_ScanRecord_fifo queue(c_scanRecordPool,
15819                                        rangeScan != 0 ?
15820                                        fragptr.p->m_queuedScans :
15821                                        tupScan != 0 ?
15822                                        fragptr.p->m_queuedTupScans :
15823                                        fragptr.p->m_queuedAccScans);
15824     jam();
15825     ndbrequire(reserved == 0);
15826     queue.remove(scanptr);
15827     return false;
15828   }
15829 
15830   if (scanPtr->scanKeyinfoFlag)
15831   {
15832     jam();
15833     ScanRecordPtr tmp;
15834 #ifdef TRACE_SCAN_TAKEOVER
15835     ndbout_c("removing (%d %d)", scanPtr->scanNumber, scanPtr->fragPtrI);
15836 #endif
15837     c_scanTakeOverHash.remove(tmp, * scanPtr);
15838     ndbrequire(tmp.p == scanPtr);
15839   }
15840 
15841   {
15842     /**
15843      * DESIGN PATTERN DESCRIPTION:
15844      * ---------------------------
15845      * The scans object below is created on the stack, it is deleted
15846      * when we reach the end of the code block where it is created, to
15847      * avoid keeping the object around for too long we remove it from
15848      * the context by creating a code block around its use.
15849      *
15850      * This enables tail-call optimisations below in the code and also
15851      * avoids keeping the object around even when no longer needed which
15852      * can easily lead to false positives in asserts in the template
15853      * code generated by the object.
15854      */
15855     Local_ScanRecord_list scans(c_scanRecordPool, fragptr.p->m_activeScans);
15856     scans.remove(scanptr);
15857   }
15858 
15859   FragrecordPtr tFragPtr = prim_tab_fragptr;
15860 
15861   const Uint32 scanNumber = scanPtr->scanNumber;
15862   ndbrequire(!tFragPtr.p->m_scanNumberMask.get(scanNumber));
15863   ScanRecordPtr restart;
15864 
15865   {
15866     Local_ScanRecord_fifo queue(c_scanRecordPool,
15867                                        rangeScan != 0 ?
15868                                        fragptr.p->m_queuedScans :
15869                                        tupScan != 0 ?
15870                                        fragptr.p->m_queuedTupScans :
15871                                        fragptr.p->m_queuedAccScans);
15872     /**
15873      * Start of queued scans
15874      */
15875     if (likely(!queue.first(restart)) ||
15876          (scanNumber >= NR_ScanNo &&
15877           scanNumber <= Backup_ScanNo))
15878     {
15879       jamDebug();
15880 
15881       /**
15882        * LCP scan, NR scan, Backup scans won't start any queued scans since
15883        * no scanNumber useful for normal scans have been freed. Also come
15884        * here when no scans are queued.
15885        */
15886       tFragPtr.p->m_scanNumberMask.set(scanNumber);
15887       return false;
15888     }
15889 
15890     if(ERROR_INSERTED(5034))
15891     {
15892       jam();
15893       tFragPtr.p->m_scanNumberMask.set(scanNumber);
15894       return false;
15895     }
15896     ndbrequire(restart.p->scanState == ScanRecord::IN_QUEUE);
15897     queue.remove(restart);
15898   }
15899 
15900   restart.p->scanNumber = scanNumber;
15901   {
15902     Local_ScanRecord_list scans(c_scanRecordPool, fragptr.p->m_activeScans);
15903     scans.addFirst(restart);
15904   }
15905   if(restart.p->scanKeyinfoFlag)
15906   {
15907     jam();
15908 #if defined VM_TRACE || defined ERROR_INSERT
15909     ScanRecordPtr tmp;
15910     ndbrequire(!c_scanTakeOverHash.find(tmp, * restart.p));
15911 #endif
15912     c_scanTakeOverHash.add(restart);
15913 #ifdef TRACE_SCAN_TAKEOVER
15914     ndbout_c("adding-r (%d %d)", restart.p->scanNumber, restart.p->fragPtrI);
15915 #endif
15916   }
15917 
15918   /**
15919    * This state is a bit weird, but that what set in initScanRec
15920    */
15921   restart.p->scanState = ScanRecord::SCAN_FREE;
15922   if(tcConnectptr.p->transactionState == TcConnectionrec::SCAN_STATE_USED)
15923   {
15924     jam();
15925     restart_scan = restart;
15926     return true;
15927   }
15928   else
15929   {
15930     jam();
15931     ndbrequire(tcConnectptr.p->transactionState ==
15932                TcConnectionrec::WAIT_SCAN_AI);
15933     return false;
15934   }
15935 }//Dblqh::finishScanrec()
15936 
15937 /* =========================================================================
15938  * =======                       RELEASE SCAN RECORD                 =======
15939  *
15940  *       RELEASE A SCAN RECORD TO THE FREELIST.
15941  * ========================================================================= */
releaseScanrec(Signal * signal)15942 void Dblqh::releaseScanrec(Signal* signal)
15943 {
15944   jamDebug();
15945   ScanRecord * const scanPtr = scanptr.p;
15946   if (scanPtr->m_reserved == 0)
15947   {
15948     c_scanRecordPool.release(scanptr);
15949     checkPoolShrinkNeed(DBLQH_SCAN_RECORD_TRANSIENT_POOL_INDEX,
15950                         c_scanRecordPool);
15951     return;
15952   }
15953   if (scanptr.p->scanNumber == LCP_ScanNo)
15954   {
15955     jam();
15956     c_check_scanptr_i[ZLCP_CHECK_INDEX] = RNIL;
15957   }
15958   else if (scanptr.p->scanNumber == Backup_ScanNo)
15959   {
15960     jam();
15961     c_check_scanptr_i[ZBACKUP_CHECK_INDEX] = RNIL;
15962   }
15963   else
15964   {
15965     jam();
15966     ndbrequire(scanptr.p->scanNumber == NR_ScanNo);
15967     c_check_scanptr_i[ZCOPY_FRAGREQ_CHECK_INDEX] = RNIL;
15968   }
15969   init_release_scanrec(scanPtr);
15970   m_reserved_scans.addFirst(scanptr);
15971 }//Dblqh::releaseScanrec()
15972 
init_release_scanrec(ScanRecord * scanPtr)15973 void Dblqh::init_release_scanrec(ScanRecord* scanPtr)
15974 {
15975   scanPtr->scanState = ScanRecord::SCAN_FREE;
15976   scanPtr->scanType = ScanRecord::ST_IDLE;
15977   scanPtr->scanTcWaiting = 0;
15978   scanPtr->scan_lastSeen = __LINE__;
15979 }
15980 
15981 /* ------------------------------------------------------------------------
15982  * -------              SEND KEYINFO20 TO API                       -------
15983  *
15984  * Return: Length in number of Uint32 words
15985  * ------------------------------------------------------------------------  */
sendKeyinfo20(Signal * signal,ScanRecord * scanP,TcConnectionrec * tcConP)15986 Uint32 Dblqh::sendKeyinfo20(Signal* signal,
15987 			    ScanRecord * scanP,
15988 			    TcConnectionrec * tcConP)
15989 {
15990   ndbrequire(scanP->m_curr_batch_size_rows < MAX_PARALLEL_OP_PER_SCAN);
15991   KeyInfo20 * keyInfo = (KeyInfo20 *)&signal->theData[0];
15992 
15993   /**
15994    * Note that this code requires signal->theData to be big enough for
15995    * a entire key
15996    */
15997   const BlockReference ref = scanP->scanApiBlockref;
15998   const Uint32 scanOp = scanP->m_curr_batch_size_rows;
15999   Uint32 nodeId = refToNode(ref);
16000   const bool connectedToNode = getNodeInfo(nodeId).m_connected;
16001 #ifdef NOT_USED
16002   const Uint32 type = getNodeInfo(nodeId).m_type;
16003   const bool is_api= (type >= NodeInfo::API && type <= NodeInfo::REP);
16004   const bool old_dest= (getNodeInfo(nodeId).m_version < MAKE_VERSION(3,5,0));
16005 #endif
16006   const bool longable = true; // TODO is_api && !old_dest;
16007 
16008   if (isNdbMtLqh())
16009   {
16010     jam();
16011     nodeId = 0; // prevent execute direct
16012   }
16013 
16014   Uint32 * dst = keyInfo->keyData;
16015   dst += nodeId == getOwnNodeId() ? 0 : KeyInfo20::DataLength;
16016 
16017   /**
16018    * This is ugly :-(
16019    *  currently only SUMA receives KEYINFO20 inside kernel..
16020    *  and it's not really interested in the actual keyinfo,
16021    *  only the scanInfo_Node...so send only that and avoid
16022    *  messing with if's below...
16023    */
16024   Uint32 keyLen ;
16025   /* The blockReference ref could belong to an API node.
16026    * But the refToMain() is supposed to be used with only data nodes
16027    * as certain BlockReference numbers of API nodes will also
16028    * return true for 'refToMain(ref) == SUMA' which is not right.
16029    * So check the node id first before checking for the block */
16030   if (refToNode(ref) == getOwnNodeId() && refToMain(ref) == SUMA)
16031   {
16032     keyLen = 0;
16033   }
16034   else
16035   {
16036     keyLen = readPrimaryKeys(scanP, tcConP, dst);
16037   }
16038 
16039   Uint32 fragId = tcConP->fragmentid;
16040   keyInfo->clientOpPtr   = scanP->scanApiOpPtr;
16041   keyInfo->keyLen        = keyLen;
16042   keyInfo->scanInfo_Node =
16043     KeyInfo20::setScanInfo(scanOp, scanP->scanNumber) + (fragId << 20);
16044   keyInfo->transId1 = tcConP->transid[0];
16045   keyInfo->transId2 = tcConP->transid[1];
16046 
16047   Uint32 * src = signal->theData+25;
16048   if(connectedToNode)
16049   {
16050     jam();
16051 
16052     if (nodeId == getOwnNodeId())
16053     {
16054       EXECUTE_DIRECT(refToBlock(ref), GSN_KEYINFO20, signal,
16055                      KeyInfo20::HeaderLength + keyLen);
16056       jamEntry();
16057       return keyLen;
16058     }
16059     else
16060     {
16061       if(keyLen <= KeyInfo20::DataLength || !longable) {
16062 	while(keyLen > KeyInfo20::DataLength){
16063 	  jam();
16064 	  MEMCOPY_NO_WORDS(keyInfo->keyData, src, KeyInfo20::DataLength);
16065 	  sendSignal(ref, GSN_KEYINFO20, signal, 25, JBB);
16066 	  src += KeyInfo20::DataLength;;
16067 	  keyLen -= KeyInfo20::DataLength;
16068 	}
16069 
16070 	MEMCOPY_NO_WORDS(keyInfo->keyData, src, keyLen);
16071 	sendSignal(ref, GSN_KEYINFO20, signal,
16072 		   KeyInfo20::HeaderLength+keyLen, JBB);
16073 	return keyLen;
16074       }
16075 
16076       LinearSectionPtr ptr[3];
16077       ptr[0].p = src;
16078       ptr[0].sz = keyLen;
16079       sendSignal(ref, GSN_KEYINFO20, signal, KeyInfo20::HeaderLength,
16080 		 JBB, ptr, 1);
16081       return keyLen;
16082     }
16083   }
16084 
16085   /**
16086    * If this node does not have a direct connection
16087    * to the receiving node we want to send the signals
16088    * routed via the node that controls this read
16089    */
16090   Uint32 routeBlockref = tcConP->clientBlockref;
16091 
16092   if(keyLen < KeyInfo20::DataLength || !longable){
16093     jam();
16094 
16095     while (keyLen > (KeyInfo20::DataLength - 1)) {
16096       jam();
16097       MEMCOPY_NO_WORDS(keyInfo->keyData, src, KeyInfo20::DataLength - 1);
16098       keyInfo->keyData[KeyInfo20::DataLength-1] = ref;
16099       sendSignal(routeBlockref, GSN_KEYINFO20_R, signal, 25, JBB);
16100       src += KeyInfo20::DataLength - 1;
16101       keyLen -= KeyInfo20::DataLength - 1;
16102     }
16103 
16104     MEMCOPY_NO_WORDS(keyInfo->keyData, src, keyLen);
16105     keyInfo->keyData[keyLen] = ref;
16106     sendSignal(routeBlockref, GSN_KEYINFO20_R, signal,
16107 	       KeyInfo20::HeaderLength+keyLen+1, JBB);
16108     return keyLen;
16109   }
16110 
16111   keyInfo->keyData[0] = ref;
16112   LinearSectionPtr ptr[3];
16113   ptr[0].p = src;
16114   ptr[0].sz = keyLen;
16115   sendSignal(routeBlockref, GSN_KEYINFO20_R, signal,
16116 	     KeyInfo20::HeaderLength+1, JBB, ptr, 1);
16117   return keyLen;
16118 }
16119 
16120 /**
16121  * Function used to send NEXT_SCANREQ, we need to decide whether to
16122  * continue in the same signal or sending a new signal and if sending
16123  * a new signal we need to decide whether B-level, Bounded delay or
16124  * even A-level signal.
16125  *
16126  * We need to ensure that we keep track of how many outstanding NEXT_SCANREQ
16127  * we have, each time we send a NEXT_SCANREQ with ZSCAN_NEXT we need to
16128  * increment this counter to ensure that we don't end up in calling too
16129  * deep into the stack which otherwise can happen when we use multiple
16130  * ranges.
16131  */
send_next_NEXT_SCANREQ(Signal * signal,SimulatedBlock * block,ExecFunction f,ScanRecord * const scanPtr,Uint32 clientPtrI)16132 void Dblqh::send_next_NEXT_SCANREQ(Signal* signal,
16133                                    SimulatedBlock* block,
16134                                    ExecFunction f,
16135                                    ScanRecord * const scanPtr,
16136                                    Uint32 clientPtrI)
16137 {
16138   (void)clientPtrI;
16139   /**
16140    * We have a number of different cases here. There are normal
16141    * scan operations, these always execute at B-level such that
16142    * they are scheduled among the other user level transactions.
16143    *
16144    * We also have prioritised scans, these could be scans for
16145    * LCPs, Backups, Node recovery or various ALTER TABLE activities.
16146    *
16147    * All internal scan activities are treated as prioritised scans.
16148    * These need to operate with a bounded delay. Therefore we send
16149    * these signals with a bounded delay signal (implemented through
16150    * a delayed signal with delay 0). These signals can also set the
16151    * priority flag to A-level to ensure that they process more rows
16152    * per scheduling slot than otherwise. This can be necessary at
16153    * very high loads when we scan for rather small rows.
16154    *
16155    * For efficiency reasons we try to execute a number of rows before
16156    * we send a new signal. We will never go beyond ZMAX_SCAN_DIRECT_COUNT
16157    * to avoid using too much of the CPU stack and also to avoid executing
16158    * for too long without putting ourselves back in the job buffer.
16159    *
16160    * We try to maintain the coding rule of NDB to never execute for more
16161    * than about 5-10 microseconds. Executing a 100 byte row scan on normal
16162    * CPUs in 2015 will take about 1 microsecond. If we instead scan 1000
16163    * bytes we estimate the time to be about 3 microseconds. So we use the
16164    * formula 750 ns of fixed cost per row + 8 ns per word. With this formula
16165    * we want to avoid that current cost has exceeded 5000 ns. If it has we
16166    * we will schedule a signal rather than execute directly again. Given that
16167    * the exactness of the formula isn't perfect and that we want scheduling
16168    * to happen at least before 10 microseconds we will use a simplified
16169    * formula. We know that scan_direct_count must be between 0 and 3 when
16170    * coming here and not being immediately decided to send signal, so the
16171    * fixed part of the cost here is between 750 ns and 3000 ns. So we will
16172    * allow for up to 4000 ns of words before we decide to send a signal.
16173    * This means that when the number of words sent exceeds 500, then we
16174    * we will send a signal.
16175    *
16176    * These calculations are valid for HW of 2015. Future HW is likely to be
16177    * faster and also we're likely to improve the efficiency of creating
16178    * LCPs by optimising the code. The coding rules for how long a signal
16179    * can execute should stay more or less constant over time. We had the
16180    * same coding rules also in the 1990s as we have now. However if we
16181    * can execute 300 MByte per second in a CPU rather than 150 MByte per
16182    * second then we can increase those limits. So effectively we should
16183    * not change the coding rules, but we should adapt our algorithms to
16184    * make use of the coding rules in an optimal manner. Not fixing this
16185    * when HW gets faster means isn't likely to cause much problems given
16186    * that also signals from user transactions are likely to execute faster.
16187    * So mainly when we optimise the LCP code we should consider changing
16188    * those values and when we start allowing more computations due to
16189    * higher CPU throughput also in signals part of user transactions.
16190    */
16191 #define ZABS_MAX_SCAN_DIRECT_COUNT 128
16192 #define ZMICROS_TO_WAIT_IN_JBB_WITH_MARGIN 500
16193 #define ZROWS_PER_MICRO 2
16194 
16195   Uint32 prioAFlag = scanPtr->prioAFlag;
16196   Uint32 cnf_max_scan_direct_count = c_max_scan_direct_count;
16197   Uint32 max_scan_direct_count = scanPtr->m_reserved == 1 ?
16198                 (prioAFlag ? ((2 * ZRESERVED_SCAN_BATCH_SIZE) + 2) :
16199                    ZMAX_SCAN_DIRECT_COUNT) :
16200                 cnf_max_scan_direct_count;
16201   do
16202   {
16203     Uint32 scan_direct_count = m_scan_direct_count;
16204     bool max_words_reached =
16205       c_backup->get_max_words_per_scan_batch(prioAFlag,
16206                                  scanPtr->m_exec_direct_batch_size_words,
16207                                  scanPtr->lcpScan,
16208                                  clientPtrI);
16209     if (scan_direct_count >= max_scan_direct_count ||
16210         max_words_reached)
16211     {
16212       jamDebug();
16213       /**
16214        * We will check whether it is ok to execute for a longer
16215        * time. The design rule that we are trying to achieve is
16216        * that at most 1 millisecond is what we are allowed to
16217        * wait in the job buffer for execution of a primary key
16218        * operation in normal load. The rules here strive to
16219        * ensure that this is ensured, with some leeway.
16220        *
16221        * Executing one row scan takes from a few hundred nanoseconds
16222        * up to a few microseconds. If the job buffer level is low we
16223        * can allow ourselves to run for longer time. We will try to
16224        * avoid running for more than 100 microseconds even when there
16225        * is an opportunity to do it. This means we set
16226        * ZABS_MAX_SCAN_DIRECT_COUNT to 100. Thus we will never execute
16227        * more than 100 rows in one real-time break. This would only
16228        * occur if there are only few other activities at the same
16229        * time as well as that the scans don't send back much data to
16230        * the application.
16231        *
16232        * We will check the number of waiting JBB signals in the job
16233        * buffer. If this number is at or below 5 we should be safe to use
16234        * up to the maximum amount of direct scans. If it is higher we
16235        * will gradually decrease the amount of time we are allowed to
16236        * execute. If there are 10 signals in the job buffer we will set
16237        * the maximum limit to 50, with 20 we set it to 25 and so forth.
16238        * We check this by dividing 500 by the JBB level and multiplying
16239        * by number of rows executed per microsecond which we estimate to
16240        * 2.
16241        */
16242       Uint32 jbb_level = getSignalsInJBB();
16243       Uint32 tot_scan_direct_count = m_tot_scan_direct_count +
16244                                        scan_direct_count;
16245       Uint32 tot_scan_limit = ZABS_MAX_SCAN_DIRECT_COUNT;
16246       if (jbb_level >= 8)
16247       {
16248         jamDebug();
16249         tot_scan_limit = (ZMICROS_TO_WAIT_IN_JBB_WITH_MARGIN *
16250                           ZROWS_PER_MICRO) / jbb_level;
16251       }
16252       if (!max_words_reached &&
16253           tot_scan_direct_count < tot_scan_limit)
16254       {
16255         scan_direct_count = 1;
16256         m_tot_scan_direct_count = tot_scan_direct_count;
16257         /**
16258          * We will fall through here down to the code executing the next
16259          * NEXT_SCANREQ as a direct signal.
16260          */
16261       }
16262       else
16263       {
16264         scanPtr->m_exec_direct_batch_size_words = 0;
16265         BlockReference resultRef = scanPtr->scanApiBlockref;
16266 
16267         signal->theData[3] = signal->theData[2];
16268         signal->theData[2] = signal->theData[1];
16269         signal->theData[0] = scanptr.i;
16270         signal->theData[1] = GSN_NEXT_SCANREQ;
16271         if (!is_prioritised_scan(resultRef))
16272         {
16273           /* Normal user scans */
16274           jamDebug();
16275           scanPtr->scan_lastSeen = __LINE__;
16276           sendSignal(reference(), GSN_ACC_CHECK_SCAN, signal, 4, JBB);
16277           return;
16278         }
16279         if (prioAFlag)
16280         {
16281           /* Prioritised scan at high load situation */
16282           if (scanPtr->lcpScan)
16283           {
16284             jamDebug();
16285             scanPtr->scan_lastSeen = __LINE__;
16286             c_backup->pausing_lcp(4, scan_direct_count);
16287           }
16288           else
16289           {
16290             jamDebug();
16291             scanPtr->scan_lastSeen = __LINE__;
16292           }
16293         }
16294         else
16295         {
16296           jamDebug();
16297           scanPtr->scan_lastSeen = __LINE__;
16298         }
16299         /* Prioritised scan operation */
16300         jamDebug();
16301         sendSignalWithDelay(reference(), GSN_ACC_CHECK_SCAN,
16302                             signal, BOUNDED_DELAY, 4);
16303         return;
16304       }
16305     }
16306     jamDebug();
16307     m_scan_direct_count = scan_direct_count + 1;
16308     m_in_send_next_scan = 1;
16309     /**
16310      * To ensure that the scheduler behave differently with more
16311      * execute direct we report that an extra signal was executed
16312      * as part of this signal execution.
16313      */
16314     scanPtr->scan_lastSeen = __LINE__;
16315     signal->m_extra_signals++;
16316     jamDebug();
16317     block->EXECUTE_DIRECT_FN(f, signal);
16318     if (m_in_send_next_scan == 1)
16319     {
16320       /**
16321        * No more calls to perform
16322        */
16323       jamDebug();
16324       m_in_send_next_scan = 0;
16325       return;
16326     }
16327     jamDebug();
16328     ndbassert(m_in_send_next_scan == 2);
16329     m_in_send_next_scan = 0;
16330   } while (1);
16331 }
16332 
16333 /* ------------------------------------------------------------------------
16334  * -------        SEND SCAN_FRAGCONF TO TC THAT CONTROLS THE SCAN   -------
16335  *
16336  * ------------------------------------------------------------------------ */
sendScanFragConf(Signal * signal,Uint32 scanCompleted,const TcConnectionrec * const regTcPtr)16337 void Dblqh::sendScanFragConf(Signal* signal,
16338                              Uint32 scanCompleted,
16339                              const TcConnectionrec* const regTcPtr)
16340 {
16341   jamDebug();
16342   ScanRecord * const scanPtr = scanptr.p;
16343   const Uint32 completed_ops= scanPtr->m_curr_batch_size_rows;
16344   const Uint32 total_len= scanPtr->m_curr_batch_size_bytes / sizeof(Uint32);
16345 
16346   ndbassert((scanPtr->m_curr_batch_size_bytes % sizeof(Uint32)) == 0);
16347 
16348   ndbassert(scanPtr->scanTcWaiting != 0);
16349   scanPtr->scanTcWaiting = 0;
16350 
16351   if(ERROR_INSERTED(5037)){
16352     CLEAR_ERROR_INSERT_VALUE;
16353     return;
16354   }
16355 
16356   if (!scanPtr->lcpScan)
16357   {
16358     jamDebug();
16359     Fragrecord::UsageStat& useStat = fragptr.p->m_useStat;
16360     ndbassert(useStat.m_scanFragReqCount > 0);
16361 
16362     useStat.m_scanRowsReturned += scanPtr->m_curr_batch_size_rows;
16363     useStat.m_scanWordsReturned +=
16364       scanPtr->m_curr_batch_size_bytes/sizeof(Uint32);
16365   }
16366 
16367   if(!scanPtr->scanLockHold)
16368   {
16369     jamDebug();
16370     scanPtr->m_curr_batch_size_rows = 0;
16371     scanPtr->m_curr_batch_size_bytes= 0;
16372   }
16373   scanPtr->m_stop_batch = 0;
16374   ScanFragConf * conf = (ScanFragConf*)&signal->theData[0];
16375 #ifdef NOT_USED
16376   NodeId tc_node_id= refToNode(regTcPtr->clientBlockref);
16377 #endif
16378   const Uint32 senderData = regTcPtr->clientConnectrec;
16379   const Uint32 trans_id1= regTcPtr->transid[0];
16380   const Uint32 trans_id2= regTcPtr->transid[1];
16381   const BlockReference blockRef = regTcPtr->clientBlockref;
16382 
16383   conf->senderData = senderData;
16384   conf->completedOps = completed_ops;
16385   conf->fragmentCompleted = scanCompleted;
16386   conf->transId1 = trans_id1;
16387   conf->transId2 = trans_id2;
16388   conf->total_len= total_len;
16389 
16390   JobBufferLevel prio_level = JBB;
16391   if (scanPtr->prioAFlag)
16392   {
16393     jamDebug();
16394     prio_level = JBA;
16395   }
16396   sendSignal(blockRef, GSN_SCAN_FRAGCONF,
16397              signal, ScanFragConf::SignalLength, prio_level);
16398 }//Dblqh::sendScanFragConf()
16399 
16400 /* ######################################################################### */
16401 /* #######                NODE RECOVERY MODULE                       ####### */
16402 /*                                                                           */
16403 /* ######################################################################### */
16404 /*---------------------------------------------------------------------------*/
16405 /*                                                                           */
16406 /*   THIS MODULE IS USED WHEN A NODE HAS FAILED. IT PERFORMS A COPY OF A     */
16407 /*   FRAGMENT TO A NEW REPLICA OF THE FRAGMENT. IT DOES ALSO SHUT DOWN ALL   */
16408 /*   CONNECTIONS TO THE FAILED NODE.                                         */
16409 /*---------------------------------------------------------------------------*/
16410 Uint32
calculateHash(Uint32 tableId,const Uint32 * src)16411 Dblqh::calculateHash(Uint32 tableId, const Uint32* src)
16412 {
16413   jam();
16414   Uint64 Tmp[(MAX_KEY_SIZE_IN_WORDS*MAX_XFRM_MULTIPLY) >> 1];
16415   Uint32 keyPartLen[MAX_ATTRIBUTES_IN_INDEX];
16416   Uint32 keyLen = xfrm_key_hash(tableId, src,
16417                                 (Uint32*)Tmp, sizeof(Tmp) >> 2,
16418                                 keyPartLen);
16419   ndbrequire(keyLen);
16420 
16421   return md5_hash(Tmp, keyLen);
16422 }//Dblqh::calculateHash()
16423 
16424 /**
16425  * PREPARE COPY FRAG REQ
16426  */
16427 void
execPREPARE_COPY_FRAG_REQ(Signal * signal)16428 Dblqh::execPREPARE_COPY_FRAG_REQ(Signal* signal)
16429 {
16430   jamEntry();
16431   PrepareCopyFragReq req = *(PrepareCopyFragReq*)signal->getDataPtr();
16432 
16433   CRASH_INSERTION(5045);
16434 
16435   tabptr.i = req.tableId;
16436   ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
16437 
16438   Uint32 max_page = RNIL;
16439 
16440   if (getOwnNodeId() != req.startingNodeId)
16441   {
16442     jam();
16443     /**
16444      * This is currently dead code...
16445      *   but is provided so we can impl. a better scan+delete on
16446      *   starting node wo/ having to change running node
16447      */
16448     ndbrequire(getOwnNodeId() == req.copyNodeId);
16449     c_tup->get_frag_info(req.tableId, req.fragId, &max_page);
16450 
16451     PrepareCopyFragConf* conf = (PrepareCopyFragConf*)signal->getDataPtrSend();
16452     conf->senderData = req.senderData;
16453     conf->senderRef = reference();
16454     conf->tableId = req.tableId;
16455     conf->fragId = req.fragId;
16456     conf->copyNodeId = req.copyNodeId;
16457     conf->startingNodeId = req.startingNodeId;
16458     conf->maxPageNo = max_page;
16459     conf->completedGci = 0;
16460     sendSignal(req.senderRef, GSN_PREPARE_COPY_FRAG_CONF,
16461                signal, PrepareCopyFragConf::SignalLength, JBB);
16462 
16463     return;
16464   }
16465 
16466   Uint32 completedGci = 0;
16467   /* Assuming 1 at a time... */
16468   c_fragCopyTable = req.tableId;
16469   c_fragCopyFrag = req.fragId;
16470   if (!c_copy_fragment_in_progress)
16471   {
16472     jam();
16473     sendSignal(NDBCNTR_REF, GSN_COPY_FRAG_IN_PROGRESS_REP, signal, 1, JBB);
16474   }
16475   c_copy_fragment_in_progress = true;
16476 
16477   if (c_fragmentCopyStart == 0)
16478   {
16479     c_fragmentCopyStart = NdbTick_CurrentMillisecond();
16480     g_eventLogger->info("LDM(%u): Starting to copy fragments.",
16481                         instance());
16482   }
16483   c_fragmentsCopied++;
16484   c_prepare_copy_fragreq_save = req;
16485 
16486   if (! DictTabInfo::isOrderedIndex(tabptr.p->tableType))
16487   {
16488     jam();
16489     DEB_COPY(("(%u)Copy tab(%u,%u) starts",
16490               instance(),
16491               c_fragCopyTable,
16492               c_fragCopyFrag));
16493     ndbrequire(getFragmentrec(signal, req.fragId));
16494 
16495     /**
16496      * We set AC_IGNORED to ensure we ignore transactions (but still
16497      * pass them on to the next replica) before we have seen the first
16498      * copy row arrive.
16499      *
16500      * Here we also get the number of pages that we have in the starting
16501      * node. This information is used by the live node to send
16502      * DELETE by ROWID for all rows that potentially could exist in pages
16503      * no longer existing on the live node.
16504      */
16505     fragptr.p->m_copy_started_state = Fragrecord::AC_IGNORED;
16506     fragptr.p->fragStatus = Fragrecord::ACTIVE_CREATION;
16507     fragptr.p->logFlag = Fragrecord::STATE_FALSE;
16508     completedGci = fragptr.p->m_completed_gci;
16509 
16510     c_tup->get_frag_info(req.tableId, req.fragId, &max_page);
16511     if ((c_copy_frag_halted &&
16512          c_copy_frag_halt_state == COPY_FRAG_HALT_STATE_IDLE) ||
16513          (!c_copy_frag_halted &&
16514           c_copy_frag_halt_state == COPY_FRAG_HALT_WAIT_FIRST_LQHKEYREQ))
16515     {
16516       jam();
16517       /**
16518        * Copy fragment process have been halted due to overload
16519        * of UNDO log. We will respond to this signal when
16520        * overload is gone.
16521        */
16522       DEB_COPY(("(%u)Halt after PREPARE_COPY_FRAG_REQ, tab(%u,%u)",
16523                 instance(),
16524                 req.tableId,
16525                 req.fragId));
16526       c_copy_frag_halted = true;
16527       c_copy_frag_halt_state = PREPARE_COPY_FRAG_IS_HALTED;
16528       return;
16529     }
16530   }
16531   send_prepare_copy_frag_conf(signal, req, completedGci, max_page);
16532 }
16533 
16534 void
send_prepare_copy_frag_conf(Signal * signal,PrepareCopyFragReq & req,Uint32 completedGci,Uint32 max_page)16535 Dblqh::send_prepare_copy_frag_conf(Signal *signal,
16536                                    PrepareCopyFragReq &req,
16537                                    Uint32 completedGci,
16538                                    Uint32 max_page)
16539 {
16540   PrepareCopyFragConf* conf = (PrepareCopyFragConf*)signal->getDataPtrSend();
16541   conf->senderData = req.senderData;
16542   conf->senderRef = reference();
16543   conf->tableId = req.tableId;
16544   conf->fragId = req.fragId;
16545   conf->copyNodeId = req.copyNodeId;
16546   conf->startingNodeId = req.startingNodeId;
16547   conf->maxPageNo = max_page;
16548   conf->completedGci = completedGci;
16549   sendSignal(req.senderRef, GSN_PREPARE_COPY_FRAG_CONF,
16550              signal, PrepareCopyFragConf::SignalLength, JBB);
16551 }
16552 
16553 /* *************************************** */
16554 /*  COPY_FRAGREQ: Start copying a fragment */
16555 /* *************************************** */
execCOPY_FRAGREQ(Signal * signal)16556 void Dblqh::execCOPY_FRAGREQ(Signal* signal)
16557 {
16558   jamEntry();
16559   const CopyFragReq * const copyFragReq = (CopyFragReq *)&signal->theData[0];
16560   tabptr.i = copyFragReq->tableId;
16561   ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
16562   Uint32 i;
16563   const Uint32 fragId = copyFragReq->fragId;
16564   const Uint32 copyPtr = copyFragReq->userPtr;
16565   const Uint32 userRef = copyFragReq->userRef;
16566   const Uint32 nodeId = copyFragReq->nodeId;
16567   const Uint32 gci = copyFragReq->gci;
16568 
16569   ndbrequire(cnoActiveCopy < 3);
16570   ndbrequire(getFragmentrec(signal, fragId));
16571   ndbrequire(cfirstfreeTcConrec != RNIL);
16572   ndbrequire(fragptr.p->m_scanNumberMask.get(NR_ScanNo));
16573 
16574   Uint32 nodeCount = copyFragReq->nodeCount;
16575   NdbNodeBitmask nodemask;
16576   {
16577     ndbrequire(nodeCount <= MAX_REPLICAS);
16578     for (i = 0; i<nodeCount; i++)
16579       nodemask.set(copyFragReq->nodeList[i]);
16580   }
16581   Uint32 maxPage = copyFragReq->nodeList[nodeCount];
16582   Uint32 requestInfo = copyFragReq->nodeList[nodeCount + 1];
16583 
16584   if (signal->getLength() < CopyFragReq::SignalLength + nodeCount)
16585   {
16586     jam();
16587     requestInfo = CopyFragReq::CFR_TRANSACTIONAL;
16588   }
16589 
16590   if (requestInfo == CopyFragReq::CFR_NON_TRANSACTIONAL)
16591   {
16592     jam();
16593   }
16594   else
16595   {
16596     fragptr.p->fragDistributionKey = copyFragReq->distributionKey;
16597   }
16598   Uint32 key = fragptr.p->fragDistributionKey;
16599 
16600   if (DictTabInfo::isOrderedIndex(tabptr.p->tableType)) {
16601     jam();
16602     /**
16603      * Ordered index doesn't need to be copied
16604      */
16605     CopyFragConf * const conf = (CopyFragConf *)&signal->theData[0];
16606     conf->userPtr = copyPtr;
16607     conf->sendingNodeId = cownNodeid;
16608     conf->startingNodeId = nodeId;
16609     conf->tableId = tabptr.i;
16610     conf->fragId = fragId;
16611     sendSignal(userRef, GSN_COPY_FRAGCONF, signal,
16612 	       CopyFragConf::SignalLength, JBB);
16613     return;
16614   }//if
16615 
16616   {
16617     /* NR Scans allocate reserved scan records */
16618     Local_ScanRecord_list scans(c_scanRecordPool, fragptr.p->m_activeScans);
16619     ndbrequire(m_reserved_scans.first(scanptr));
16620     m_reserved_scans.remove(scanptr);
16621     scans.addFirst(scanptr);
16622   }
16623 
16624   DEB_COPY(("(%u)COPY_FRAGREQ tab(%u,%u)",
16625             instance(),
16626             tabptr.i,
16627             fragId));
16628 /* ------------------------------------------------------------------------- */
16629 // We keep track of how many operation records in ACC that has been booked.
16630 // Copy fragment has records always booked and thus need not book any. The
16631 // most operations in parallel use is the m_max_batch_size_rows.
16632 // This variable has to be set-up here since it is used by releaseScanrec
16633 // to unbook operation records in ACC.
16634 /* ------------------------------------------------------------------------- */
16635   ScanRecord * const scanPtr = scanptr.p;
16636   scanPtr->m_max_batch_size_rows = 0;
16637   scanPtr->rangeScan = 0;
16638   scanPtr->tupScan = 0;
16639   /**
16640    * Will always succeed since we can only call this once at a time for
16641    * NR operations, LCP scan operation and backup scan operation. All these
16642    * 3 operations have a reserved record always available for them.
16643    * The seizeTcrec would crash if this wasn't true and we've run out this
16644    * resource.
16645    */
16646   TcConnectionrecPtr tcConnectptr;
16647   seizeTcrec(tcConnectptr);
16648   tcConnectptr.p->clientBlockref = userRef;
16649   ndbrequire(Magic::check_ptr(tcConnectptr.p));
16650   /**
16651    * Remove implicit cast/usage of CopyFragReq
16652    */
16653   //initCopyrec(signal);
16654   {
16655     const Uint32 tcPtrI = tcConnectptr.i;
16656     const Uint32 fragPtrI = fragptr.i;
16657     const Uint32 schemaVersion = copyFragReq->schemaVersion;
16658     const BlockReference myRef = reference();
16659     const BlockReference tupRef = ctupBlockref;
16660 
16661     scanPtr->copyPtr = copyPtr;
16662     scanPtr->scanNodeId = nodeId;
16663     scanPtr->scanTcrec = tcPtrI;
16664     scanPtr->scanApiOpPtr = tcPtrI;
16665     scanPtr->fragPtrI = fragPtrI;
16666     scanPtr->scanSchemaVersion = schemaVersion;
16667     scanPtr->scanApiBlockref = myRef;
16668     scanPtr->scanBlockref = tupRef;
16669     scanPtr->scanBlock = c_tup;
16670     scanPtr->scanFunction_NEXT_SCANREQ =
16671       c_tup->getExecuteFunction(GSN_NEXT_SCANREQ);
16672     scanPtr->scanType = ScanRecord::COPY;
16673     scanPtr->scanCompletedStatus = ZFALSE;
16674     scanPtr->scanErrorCounter = 0;
16675     scanPtr->scanNumber = NR_ScanNo;
16676     scanPtr->scanKeyinfoFlag = 0; // Don't put into hash
16677     scanPtr->scanLockHold = ZFALSE;
16678     scanPtr->m_curr_batch_size_rows = 0;
16679     scanPtr->m_curr_batch_size_bytes= 0;
16680     scanPtr->m_exec_direct_batch_size_words = 0;
16681     scanPtr->readCommitted = 0;
16682     scanPtr->prioAFlag = ZFALSE;
16683     scanPtr->m_first_match_flag = 0;
16684     scanPtr->scanStoredProcId = RNIL;
16685     scanPtr->scanAccPtr = RNIL;
16686     scanPtr->scan_lastSeen = __LINE__;
16687     scanPtr->scan_check_lcp_stop = 0;
16688     m_scan_direct_count = ZMAX_SCAN_DIRECT_COUNT - 6;
16689     fragptr.p->m_scanNumberMask.clear(NR_ScanNo);
16690     c_check_scanptr_i[ZCOPY_FRAGREQ_CHECK_INDEX] = scanptr.i;
16691     c_check_scanptr_save_timer[ZCOPY_FRAGREQ_CHECK_INDEX] =
16692       tcConnectptr.p->tcTimer;
16693   }
16694 
16695   initScanTc(0,
16696              0,
16697              (DBLQH << 20) + (cownNodeid << 8),
16698              fragId,
16699              copyFragReq->nodeId,
16700              0,
16701              tcConnectptr);
16702   /**
16703    * Copy fragment always performed on primary table fragment,
16704    * never applied on an ordered index.
16705    */
16706   prim_tab_fragptr = fragptr;
16707   c_tup->prepare_tab_pointers(prim_tab_fragptr.p->tupFragptr);
16708   /* Save TC connect record used */
16709   c_tc_connect_rec_copy_frag = tcConnectptr.i;
16710 
16711   cactiveCopy[cnoActiveCopy] = fragptr.i;
16712   cnoActiveCopy++;
16713 
16714   {
16715     TcConnectionrec * const regTcPtr = tcConnectptr.p;
16716     const Uint32 tcPtrI = tcConnectptr.i;
16717 
16718     regTcPtr->schemaVersion = scanPtr->scanSchemaVersion;
16719     regTcPtr->copyCountWords = 0;
16720     regTcPtr->tcOprec = tcPtrI;
16721     regTcPtr->savePointId = gci;
16722     regTcPtr->applRef = 0;
16723     regTcPtr->transactionState = TcConnectionrec::SCAN_STATE_USED;
16724   }
16725 
16726   if (! nodemask.isclear())
16727   {
16728     ndbrequire(nodemask.get(getOwnNodeId()));
16729     ndbrequire(nodemask.get(nodeId)); // cpy dest
16730     nodemask.clear(getOwnNodeId());
16731     nodemask.clear(nodeId);
16732 
16733     UpdateFragDistKeyOrd*
16734       ord = (UpdateFragDistKeyOrd*)signal->getDataPtrSend();
16735     ord->tableId = tabptr.i;
16736     ord->fragId = fragId;
16737     ord->fragDistributionKey = key;
16738     i = 0;
16739     while ((i = nodemask.find(i+1)) != NdbNodeBitmask::NotFound)
16740     {
16741       sendSignal(calcInstanceBlockRef(number(), i),
16742                  GSN_UPDATE_FRAG_DIST_KEY_ORD,
16743                  signal, UpdateFragDistKeyOrd::SignalLength, JBB);
16744     }
16745   }
16746 
16747   {
16748     AccScanReq * req = (AccScanReq*)&signal->theData[0];
16749     Uint32 sig_request_info = 0;
16750     AccScanReq::setCopyFragScanFlag(sig_request_info, 1);
16751     if (requestInfo == CopyFragReq::CFR_TRANSACTIONAL)
16752     {
16753       jam();
16754       /**
16755        * An node-recovery scan, is shared lock
16756        *   and may not perform disk-scan (as it then can miss uncomitted
16757        *   inserts)
16758        */
16759       //AccScanReq::setLockMode(sig_request_info, 0);
16760       //AccScanReq::setReadCommittedFlag(sig_request_info, 0);
16761       AccScanReq::setNRScanFlag(sig_request_info, 1);
16762       AccScanReq::setNoDiskScanFlag(sig_request_info, 1);
16763     }
16764     else
16765     {
16766       jam();
16767       /**
16768        * The non-transaction scan is really only a "normal" tup scan
16769        *   committed read.
16770        */
16771       //AccScanReq::setLockMode(sig_request_info, 0);
16772       AccScanReq::setReadCommittedFlag(sig_request_info, 1);
16773       AccScanReq::setNoDiskScanFlag(sig_request_info, 1);
16774       scanPtr->readCommitted = 1;
16775     }
16776     req->requestInfo = sig_request_info;
16777     scanPtr->scanState = ScanRecord::WAIT_ACC_COPY;
16778     const Uint32 scanPtrI = scanptr.i;
16779     const Uint32 my_ref = cownref;
16780     const Uint32 tabPtrI = tabptr.i;
16781     TcConnectionrec * const regTcPtr = tcConnectptr.p;
16782 
16783     req->senderData = scanPtrI;
16784     req->senderRef = my_ref;
16785     req->tableId = tabPtrI;
16786     req->fragmentNo = fragId;
16787 
16788     SimulatedBlock *block = scanPtr->scanBlock;
16789     const Uint32 transId1 = regTcPtr->transid[0];
16790     const Uint32 transId2 = regTcPtr->transid[1];
16791     const Uint32 savePointId = regTcPtr->savePointId;
16792     ExecFunction f = block->getExecuteFunction(GSN_ACC_SCANREQ);
16793 
16794     req->transId1 = transId1;
16795     req->transId2 = transId2;
16796     req->savePointId = savePointId;
16797     req->maxPage = maxPage;
16798 
16799     block->EXECUTE_DIRECT_FN(f, signal);
16800   }
16801   ndbrequire(signal->theData[8] == 0)
16802   /* ACC_SCANCONF */
16803   jamEntry();
16804   accScanConfCopyLab(signal);
16805   return;
16806 }//Dblqh::execCOPY_FRAGREQ()
16807 
16808 void
execUPDATE_FRAG_DIST_KEY_ORD(Signal * signal)16809 Dblqh::execUPDATE_FRAG_DIST_KEY_ORD(Signal * signal)
16810 {
16811   jamEntry();
16812   UpdateFragDistKeyOrd* ord =(UpdateFragDistKeyOrd*)signal->getDataPtr();
16813 
16814   tabptr.i = ord->tableId;
16815   ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
16816   ndbrequire(getFragmentrec(signal, ord->fragId));
16817   fragptr.p->fragDistributionKey = ord->fragDistributionKey;
16818 }
16819 
accScanConfCopyLab(Signal * signal)16820 void Dblqh::accScanConfCopyLab(Signal* signal)
16821 {
16822   ScanRecord * const scanPtr = scanptr.p;
16823   AccScanConf * const accScanConf = (AccScanConf *)&signal->theData[0];
16824   TcConnectionrecPtr tcConnectptr;
16825   tcConnectptr.i = scanPtr->scanTcrec;
16826   ndbrequire(tcConnect_pool.getValidPtr(tcConnectptr));
16827 /*--------------------------------------------------------------------------*/
16828 /*  PRECONDITION: SCAN_STATE = WAIT_ACC_COPY                                */
16829 /*--------------------------------------------------------------------------*/
16830   if (accScanConf->flag == AccScanConf::ZEMPTY_FRAGMENT) {
16831     jam();
16832 /*---------------------------------------------------------------------------*/
16833 /*   THE FRAGMENT WAS EMPTY.                                                 */
16834 /*   REPORT SUCCESSFUL COPYING.                                              */
16835 /*---------------------------------------------------------------------------*/
16836     tupCopyCloseConfLab(signal, tcConnectptr);
16837     return;
16838   }//if
16839   TcConnectionrec * const regTcPtr = tcConnectptr.p;
16840   {
16841     const Uint32 accPtr = accScanConf->accPtr;
16842     const Uint32 sig0 = regTcPtr->tupConnectrec;
16843     const Uint32 sig1 = regTcPtr->tableref;
16844     const Uint32 sig2 = scanPtr->scanSchemaVersion;
16845     const Uint32 sig5 = scanPtr->scanApiBlockref;
16846     scanPtr->scanAccPtr = accPtr;
16847     signal->theData[0] = sig0;
16848     signal->theData[1] = sig1;
16849     signal->theData[2] = sig2;
16850     signal->theData[3] = ZSTORED_PROC_COPY;
16851 // theData[4] is not used in TUP with ZSTORED_PROC_COPY
16852     signal->theData[5] = sig5;
16853     c_tup->execSTORED_PROCREQ(signal);
16854     jamEntry();
16855   }
16856 /*---------------------------------------------------------------------------*/
16857 /*   ENTER STORED_PROCCONF WITH                                              */
16858 /*     0 success == CONF, 1 failure == REF                                   */
16859 /*     STORED_PROC_ID                                                        */
16860 /*---------------------------------------------------------------------------*/
16861   ndbrequire(signal->theData[0] == 0);
16862   scanPtr->scanStoredProcId = signal->theData[1];
16863   scanPtr->scanAiLength = signal->theData[2];
16864   c_tup->copyAttrinfo(scanPtr->scanStoredProcId);
16865 
16866   if (scanPtr->scanCompletedStatus == ZTRUE)
16867   {
16868     jam();
16869 /*---------------------------------------------------------------------------*/
16870 /*   THE COPY PROCESS HAVE BEEN COMPLETED, MOST LIKELY DUE TO A NODE FAILURE.*/
16871 /*---------------------------------------------------------------------------*/
16872     closeCopyLab(signal, regTcPtr);
16873     return;
16874   }//if
16875   fragptr.i = regTcPtr->fragmentptr;
16876   c_fragment_pool.getPtr(fragptr);
16877   scanPtr->scanState = ScanRecord::WAIT_NEXT_SCAN_COPY;
16878   ndbrequire(fragptr.p->fragStatus == Fragrecord::FSACTIVE);
16879 
16880   /**
16881    * Start sending ROWID for all operations from now on
16882    */
16883   fragptr.p->m_copy_started_state = Fragrecord::AC_NR_COPY;
16884   if (ERROR_INSERTED(5714))
16885   {
16886     ndbout_c("Starting copy of tab(%u,%u)",
16887              fragptr.p->tabRef, fragptr.p->fragId);
16888   }
16889 
16890   if (false && fragptr.p->tabRef > 4)
16891   {
16892     ndbout_c("STOPPING COPY X = [ %d %d %d %d ]",
16893 	     refToBlock(scanPtr->scanBlockref),
16894 	     scanPtr->scanAccPtr, RNIL, NextScanReq::ZSCAN_NEXT);
16895 
16896     /**
16897      * RESTART: > DUMP 7020 332 X
16898      */
16899     return;
16900   }
16901   {
16902     /**
16903      * This is always the first call to send_next_NEXT_SCANREQ.
16904      * This is always a full partition scan and can thus not
16905      * have multiple ranges and thus cannot be restarted.
16906      */
16907     ndbassert(m_in_send_next_scan == 0);
16908     const Uint32 sig0 = scanPtr->scanAccPtr;
16909     SimulatedBlock *block = scanPtr->scanBlock;
16910     ExecFunction f = scanPtr->scanFunction_NEXT_SCANREQ;
16911 
16912     signal->theData[1] = RNIL;
16913     signal->theData[2] = NextScanReq::ZSCAN_NEXT;
16914     signal->theData[0] = sig0;
16915     scanPtr->scanState = ScanRecord::WAIT_NEXT_SCAN_COPY;
16916     scanPtr->scan_lastSeen = __LINE__;
16917     send_next_NEXT_SCANREQ(signal,
16918                            block,
16919                            f,
16920                            scanPtr,
16921                            regTcPtr->clientConnectrec);
16922   }
16923 }//Dblqh::accScanConfCopyLab()
16924 
16925 /*---------------------------------------------------------------------------*/
16926 /*       ENTER NEXT_SCANCONF WITH                                            */
16927 /*         SCANPTR,                                                          */
16928 /*         TFRAGID,                                                          */
16929 /*         TACC_OPPTR,                                                       */
16930 /*         TLOCAL_KEY1,                                                      */
16931 /*         TLOCAL_KEY2,                                                      */
16932 /*         TKEY_LENGTH,                                                      */
16933 /*         TKEY1,                                                            */
16934 /*         TKEY2,                                                            */
16935 /*         TKEY3,                                                            */
16936 /*         TKEY4                                                             */
16937 /*---------------------------------------------------------------------------*/
16938 /*       PRECONDITION: SCAN_STATE = WAIT_NEXT_SCAN_COPY                      */
16939 /*---------------------------------------------------------------------------*/
nextScanConfCopyLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)16940 void Dblqh::nextScanConfCopyLab(Signal* signal,
16941                                 const TcConnectionrecPtr tcConnectptr)
16942 {
16943   NextScanConf * const nextScanConf = (NextScanConf *)&signal->theData[0];
16944   if (nextScanConf->fragId == RNIL) {
16945     jam();
16946 /*---------------------------------------------------------------------------*/
16947 /*   THERE ARE NO MORE TUPLES TO FETCH. WE NEED TO CLOSE                     */
16948 /*   THE COPY IN ACC AND DELETE THE STORED PROCEDURE IN TUP                  */
16949 /*---------------------------------------------------------------------------*/
16950     if (tcConnectptr.p->copyCountWords == 0) {
16951       closeCopyLab(signal, tcConnectptr.p);
16952       return;
16953     }//if
16954 /*---------------------------------------------------------------------------*/
16955 // Wait until copying is completed also at the starting node before reporting
16956 // completion. Signal completion through scanCompletedStatus-flag.
16957 /*---------------------------------------------------------------------------*/
16958     scanptr.p->scan_check_lcp_stop = 0;
16959     scanptr.p->scanCompletedStatus = ZTRUE;
16960     scanptr.p->scanState = ScanRecord::WAIT_LQHKEY_COPY;
16961     scanptr.p->scan_lastSeen = __LINE__;
16962     if (ERROR_INSERTED(5043))
16963     {
16964       CLEAR_ERROR_INSERT_VALUE;
16965       tcConnectptr.p->copyCountWords = ~0;
16966       signal->theData[0] = 9999;
16967       sendSignal(numberToRef(CMVMI, scanptr.p->scanNodeId),
16968 		 GSN_NDB_TAMPER, signal, 1, JBA);
16969     }
16970     return;
16971   }//if
16972 
16973   TcConnectionrec * tcConP = tcConnectptr.p;
16974 
16975   tcConP->m_use_rowid = true;
16976   /**
16977    * For copy fragment scans we are scanning from TUP, TUP returns
16978    * row ids in its scan, so we can safely pass it on here to an
16979    * operation record knowing that it is a row id we are passing.
16980    */
16981   tcConP->m_row_id = scanptr.p->m_row_id;
16982 
16983   scanptr.p->m_curr_batch_size_rows++;
16984 
16985   if (signal->getLength() == NextScanConf::SignalLengthNoKeyInfo)
16986   {
16987     jam();
16988     /**
16989      * This code handles the case in Node recovery where we have found a record
16990      * which didn't exist in this live node, it might however require that the
16991      * starting node deletes it. There is no primary key information since the
16992      * tuple was deleted and we only keep the fixed size part of the row after
16993      * deletion.
16994      *
16995      * This performs DELETE by ROWID, if there is a row at this ROWID in the
16996      * starting node it will also know the primary key to delete.
16997      */
16998     scanptr.p->scan_check_lcp_stop = 0;
16999     ndbrequire(nextScanConf->accOperationPtr == RNIL);
17000     initCopyTc(signal, ZDELETE, tcConP);
17001     set_acc_ptr_in_scan_record(scanptr.p, 0, RNIL);
17002     tcConP->gci_hi = nextScanConf->gci;
17003     tcConP->gci_lo = 0;
17004 
17005     tcConP->primKeyLen = 0;
17006     tcConP->totSendlenAi = 0;
17007     tcConP->connectState = TcConnectionrec::COPY_CONNECTED;
17008 
17009 /*---------------------------------------------------------------------------*/
17010 // To avoid using up to many operation records in ACC we will increase the
17011 // constant to ensure that we never send more than 40 records at a time.
17012 // This is where the constant 56 comes from. For long records this constant
17013 // will not matter that much. The current maximum is 6000 words outstanding
17014 // (including a number of those 56 words not really sent). We also have to
17015 // ensure that there are never more simultaneous usage of these operation
17016 // records to ensure that node recovery does not fail because of simultaneous
17017 // scanning.
17018 /*---------------------------------------------------------------------------*/
17019     UintR TnoOfWords = 8;
17020     TnoOfWords = TnoOfWords + MAGIC_CONSTANT;
17021     TnoOfWords = TnoOfWords + (TnoOfWords >> 2);
17022 
17023     /*-----------------------------------------------------------------
17024      * NOTE for transid1!
17025      * Transid1 in the tcConnection record is used load regulate the
17026      * copy(node recovery) process.
17027      * The number of outstanding words are written in the transid1
17028      * variable. This will be sent to the starting node in the
17029      * LQHKEYREQ signal and when the answer is returned in the LQHKEYCONF
17030      * we can reduce the number of outstanding words and check to see
17031      * if more LQHKEYREQ signals should be sent.
17032      *
17033      * However efficient this method is rather unsafe in such way that
17034      * it overwrites the transid1 original data.
17035      *
17036      * Also see TR 587.
17037      *----------------------------------------------------------------*/
17038     tcConP->transid[0] = TnoOfWords; // Data overload, see note!
17039     ndbrequire(!c_copy_frag_live_node_halted);
17040     packLqhkeyreqLab(signal, tcConnectptr);
17041     tcConP->copyCountWords += TnoOfWords;
17042     scanptr.p->scanState = ScanRecord::WAIT_LQHKEY_COPY;
17043     if (tcConP->copyCountWords < cmaxWordsAtNodeRec)
17044     {
17045       nextRecordCopy(signal, tcConnectptr);
17046       return;
17047     }
17048     return;
17049   }
17050   else
17051   {
17052     // If accOperationPtr == RNIL no record was returned by ACC
17053     if (nextScanConf->accOperationPtr == RNIL) {
17054       jam();
17055       scanptr.p->scan_lastSeen = __LINE__;
17056       signal->theData[0] = scanptr.i;
17057       signal->theData[1] = GSN_ACC_CHECK_SCAN;
17058       signal->theData[2] = AccCheckScan::ZCHECK_LCP_STOP;
17059       sendSignal(reference(), GSN_ACC_CHECK_SCAN, signal, 3, JBB);
17060       return;
17061     }
17062 
17063     initCopyTc(signal, ZINSERT, tcConP);
17064     set_acc_ptr_in_scan_record(scanptr.p, 0, nextScanConf->accOperationPtr);
17065 
17066     Fragrecord* fragPtrP= fragptr.p;
17067     scanptr.p->scanState = ScanRecord::WAIT_TUPKEY_COPY;
17068     scanptr.p->scan_check_lcp_stop = 0;
17069     tcConP->transactionState = TcConnectionrec::COPY_TUPKEY;
17070     if(tcConP->m_disk_table)
17071     {
17072       scanptr.p->scan_lastSeen = __LINE__;
17073       next_scanconf_load_diskpage(signal, scanptr.p, tcConnectptr,fragPtrP);
17074       return;
17075     }
17076     else
17077     {
17078       next_scanconf_tupkeyreq(signal, scanptr.p, tcConP, fragPtrP, RNIL);
17079       return;
17080     }
17081   }
17082 }//Dblqh::nextScanConfCopyLab()
17083 
17084 
17085 /*---------------------------------------------------------------------------*/
17086 /*   USED IN COPYING OPERATION TO RECEIVE ATTRINFO FROM TUP.                 */
17087 /*---------------------------------------------------------------------------*/
17088 /* ************>> */
17089 /*  TRANSID_AI  > */
17090 /* ************>> */
execTRANSID_AI(Signal * signal)17091 void Dblqh::execTRANSID_AI(Signal* signal)
17092 {
17093   jamEntry();
17094   /* TransID_AI received from local TUP, data is linear inline in
17095    * signal buff
17096    */
17097   TcConnectionrecPtr tcConnectptr;
17098   tcConnectptr.i = signal->theData[0];
17099   ndbrequire(tcConnect_pool.getUncheckedPtrRW(tcConnectptr));
17100   Uint32 length = signal->length() - TransIdAI::HeaderLength;
17101   TcConnectionrec::TransactionState transState =
17102     tcConnectptr.p->transactionState;
17103   Uint32 * src = &signal->theData[ TransIdAI::HeaderLength ];
17104   ndbrequire(Magic::check_ptr(tcConnectptr.p));
17105   ndbrequire(transState == TcConnectionrec::COPY_TUPKEY);
17106   bool ok= appendToSection(tcConnectptr.p->attrInfoIVal,
17107                            src,
17108                            length);
17109   if (unlikely(! ok))
17110   {
17111     jam();
17112     tcConnectptr.p->errorCode = ZGET_ATTRINBUF_ERROR;
17113   }
17114 }//Dblqh::execTRANSID_AI()
17115 
17116 /*--------------------------------------------------------------------------*/
17117 /*     ENTER TUPKEYCONF WITH                                                */
17118 /*          TC_CONNECTPTR,                                                  */
17119 /*          TDATA2,                                                         */
17120 /*          TDATA3,                                                         */
17121 /*          TDATA4,                                                         */
17122 /*          TDATA5                                                          */
17123 /*--------------------------------------------------------------------------*/
17124 /*  PRECONDITION:   TRANSACTION_STATE = COPY_TUPKEY                         */
17125 /*--------------------------------------------------------------------------*/
copyTupkeyRefLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)17126 void Dblqh::copyTupkeyRefLab(Signal* signal,
17127                              const TcConnectionrecPtr tcConnectptr)
17128 {
17129   //const TupKeyRef * tupKeyRef = (TupKeyRef *)signal->getDataPtr();
17130   ScanRecord* scanP = scanptr.p;
17131   if (scanP->readCommitted == 0)
17132   {
17133     jam();
17134     ndbabort(); // Should not be possibe...we read with lock
17135   }
17136   else
17137   {
17138     jam();
17139     /**
17140      * Any readCommitted scan, can get 626 if it finds a candidate record
17141      *   that is not visible to the scan (i.e uncommitted inserts)
17142      *   if scanning with locks (shared/exclusive) this is not visible
17143      *   to LQH as lock is taken earlier
17144      */
17145     ndbrequire(terrorCode == 626);
17146   }
17147 
17148   ndbrequire(scanptr.p->scanState == ScanRecord::WAIT_TUPKEY_COPY);
17149   if (tcConnectptr.p->errorCode != 0)
17150   {
17151     jam();
17152     closeCopyLab(signal, tcConnectptr.p);
17153     return;
17154   }
17155 
17156   if (scanptr.p->scanCompletedStatus == ZTRUE)
17157   {
17158     jam();
17159     closeCopyLab(signal, tcConnectptr.p);
17160     return;
17161   }
17162 
17163   ndbrequire(tcConnectptr.p->copyCountWords < cmaxWordsAtNodeRec);
17164   scanptr.p->scanState = ScanRecord::WAIT_LQHKEY_COPY;
17165   nextRecordCopy(signal, tcConnectptr);
17166 }
17167 
copyTupkeyConfLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)17168 void Dblqh::copyTupkeyConfLab(Signal* signal,
17169                               const TcConnectionrecPtr tcConnectptr)
17170 {
17171   ScanRecord* scanP = scanptr.p;
17172   Uint32 scan_direct_count = m_scan_direct_count;
17173   const TupKeyConf * const tupKeyConf = (TupKeyConf *)signal->getDataPtr();
17174 
17175   UintR readLength = tupKeyConf->readLength;
17176   Uint32 tableId = tcConnectptr.p->tableref;
17177   m_scan_direct_count = scan_direct_count + 1;
17178 
17179   if (scanP->readCommitted == 0)
17180   {
17181     jam();
17182     Uint32 accOpPtr= get_acc_ptr_from_scan_record(scanP, 0, false);
17183     ndbassert(accOpPtr != (Uint32)-1);
17184     c_acc->execACCKEY_ORD_no_ptr(signal, accOpPtr);
17185   }
17186 
17187   if (tcConnectptr.p->errorCode != 0) {
17188     jam();
17189     closeCopyLab(signal, tcConnectptr.p);
17190     return;
17191   }//if
17192   if (scanptr.p->scanCompletedStatus == ZTRUE) {
17193     jam();
17194 /*---------------------------------------------------------------------------*/
17195 /*   THE COPY PROCESS HAVE BEEN CLOSED. MOST LIKELY A NODE FAILURE.          */
17196 /*---------------------------------------------------------------------------*/
17197     closeCopyLab(signal, tcConnectptr.p);
17198     return;
17199   }//if
17200   TcConnectionrec * tcConP = tcConnectptr.p;
17201   tcConnectptr.p->totSendlenAi = readLength;
17202   tcConnectptr.p->connectState = TcConnectionrec::COPY_CONNECTED;
17203 
17204   /* Read primary keys from TUP into signal buffer space
17205    * (used to get here via scan keyinfo)
17206    */
17207   Uint32* tmp = signal->getDataPtrSend()+24;
17208   Uint32 len= tcConnectptr.p->primKeyLen = readPrimaryKeys(scanP, tcConP, tmp);
17209 
17210   tcConP->gci_hi = tmp[len];
17211   tcConP->gci_lo = 0;
17212   // Calculate hash (no need to linearise key)
17213   if (g_key_descriptor_pool.getPtr(tableId)->hasCharAttr)
17214   {
17215     tcConnectptr.p->hashValue = calculateHash(tableId, tmp);
17216   }
17217   else
17218   {
17219     tcConnectptr.p->hashValue = md5_hash((Uint64*)tmp, len);
17220   }
17221 
17222   // Copy keyinfo into long section for LQHKEYREQ below
17223   if (unlikely(!keyinfoLab(tmp, len, tcConnectptr)))
17224   {
17225     /* Failed to store keyInfo, fail copy
17226      * This will result in a COPY_FRAGREF being sent to
17227      * the starting node, which will cause it to fail
17228      */
17229     jamDebug();
17230     scanptr.p->scanErrorCounter++;
17231     tcConP->errorCode= ZGET_DATAREC_ERROR;
17232     scanptr.p->scanCompletedStatus= ZTRUE;
17233 
17234     closeCopyLab(signal, tcConnectptr.p);
17235     return;
17236   }
17237 
17238 /*---------------------------------------------------------------------------*/
17239 // To avoid using up to many operation records in ACC we will increase the
17240 // constant to ensure that we never send more than 40 records at a time.
17241 // This is where the constant 56 comes from. For long records this constant
17242 // will not matter that much. The current maximum is 6000 words outstanding
17243 // (including a number of those 56 words not really sent). We also have to
17244 // ensure that there are never more simultaneous usage of these operation
17245 // records to ensure that node recovery does not fail because of simultaneous
17246 // scanning.
17247 /*---------------------------------------------------------------------------*/
17248   UintR TnoOfWords = readLength + len;
17249   scanP->m_curr_batch_size_bytes += 4 * TnoOfWords;
17250   scanP->m_exec_direct_batch_size_words += readLength;
17251   TnoOfWords = TnoOfWords + MAGIC_CONSTANT;
17252   TnoOfWords = TnoOfWords + (TnoOfWords >> 2);
17253 
17254   /*-----------------------------------------------------------------
17255    * NOTE for transid1!
17256    * Transid1 in the tcConnection record is used load regulate the
17257    * copy(node recovery) process.
17258    * The number of outstanding words are written in the transid1
17259    * variable. This will be sent to the starting node in the
17260    * LQHKEYREQ signal and when the answer is returned in the LQHKEYCONF
17261    * we can reduce the number of outstanding words and check to see
17262    * if more LQHKEYREQ signals should be sent.
17263    *
17264    * However efficient this method is rather unsafe in such way that
17265    * it overwrites the transid1 original data.
17266    *
17267    * Also see TR 587.
17268    *----------------------------------------------------------------*/
17269   tcConnectptr.p->transid[0] = TnoOfWords; // Data overload, see note!
17270   ndbrequire(!c_copy_frag_live_node_halted);
17271   packLqhkeyreqLab(signal, tcConnectptr);
17272   tcConnectptr.p->copyCountWords += TnoOfWords;
17273   scanptr.p->scanState = ScanRecord::WAIT_LQHKEY_COPY;
17274   if (tcConnectptr.p->copyCountWords < cmaxWordsAtNodeRec)
17275   {
17276     nextRecordCopy(signal, tcConnectptr);
17277     return;
17278   }//if
17279   return;
17280 }//Dblqh::copyTupkeyConfLab()
17281 
17282 /*---------------------------------------------------------------------------*/
17283 /*     ENTER LQHKEYCONF                                                      */
17284 /*---------------------------------------------------------------------------*/
17285 /*   PRECONDITION: CONNECT_STATE = COPY_CONNECTED                            */
17286 /*---------------------------------------------------------------------------*/
copyCompletedLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)17287 void Dblqh::copyCompletedLab(Signal* signal,
17288                              const TcConnectionrecPtr tcConnectptr)
17289 {
17290   const LqhKeyConf * const lqhKeyConf = (LqhKeyConf *)signal->getDataPtr();
17291 
17292   ndbrequire(tcConnectptr.p->transid[1] == lqhKeyConf->transId2);
17293   if (tcConnectptr.p->copyCountWords >= cmaxWordsAtNodeRec) {
17294     tcConnectptr.p->copyCountWords -= lqhKeyConf->transId1; // Data overload, see note!
17295     if (scanptr.p->scanCompletedStatus == ZTRUE) {
17296       jam();
17297 /*---------------------------------------------------------------------------*/
17298 // Copy to complete, we will not start any new copying.
17299 /*---------------------------------------------------------------------------*/
17300       closeCopyLab(signal, tcConnectptr.p);
17301       return;
17302     }//if
17303     if (tcConnectptr.p->copyCountWords < cmaxWordsAtNodeRec)
17304     {
17305       jam();
17306       nextRecordCopy(signal, tcConnectptr);
17307       return;
17308     }//if
17309     return;
17310   }//if
17311   tcConnectptr.p->copyCountWords -= lqhKeyConf->transId1; // Data overload, see note!
17312   ndbrequire(tcConnectptr.p->copyCountWords <= cmaxWordsAtNodeRec);
17313   if (tcConnectptr.p->copyCountWords > 0) {
17314     jam();
17315     return;
17316   }//if
17317 /*---------------------------------------------------------------------------*/
17318 // No more outstanding copies. We will only start new ones from here if it was
17319 // stopped before and this only happens when copyCountWords is bigger than the
17320 // threshold value. Since this did not occur we must be waiting for completion.
17321 // Check that this is so. If not we crash to find out what is going on.
17322 /*---------------------------------------------------------------------------*/
17323 
17324   if (scanptr.p->scanCompletedStatus == ZTRUE) {
17325     jam();
17326     closeCopyLab(signal, tcConnectptr.p);
17327     return;
17328   }//if
17329 
17330   if (scanptr.p->scanState == ScanRecord::WAIT_LQHKEY_COPY &&
17331       scanptr.p->scanErrorCounter)
17332   {
17333     jam();
17334     closeCopyLab(signal, tcConnectptr.p);
17335     return;
17336   }
17337 
17338   if (c_copy_frag_live_node_performing_halt &&
17339       scanptr.p->scanState == ScanRecord::WAIT_LQHKEY_COPY)
17340   {
17341     jam();
17342     /* No more outstanding copy rows. We are only waiting now. */
17343     DEB_COPY(("(%u):2: Copy fragment process halted", instance()));
17344     scanptr.p->scanState = ScanRecord::COPY_FRAG_HALTED;
17345     scanptr.p->scan_lastSeen = __LINE__;
17346     c_copy_frag_live_node_halted = true;
17347     c_copy_frag_live_node_performing_halt = false;
17348     send_halt_copy_frag_conf(signal, false);
17349     return;
17350   }
17351 
17352   /**
17353    * We could come here even when c_copy_frag_live_node_performing_halt
17354    * is set. In this case scanState is WAIT_NEXT_SCAN_COPY which means
17355    * we are waiting for an outstanding NEXT_SCANREQ signal.
17356    */
17357   ndbassert(!c_copy_frag_live_node_performing_halt ||
17358             scanptr.p->scanState == ScanRecord::WAIT_NEXT_SCAN_COPY);
17359 
17360   if (scanptr.p->scanState == ScanRecord::WAIT_LQHKEY_COPY)
17361   {
17362     jam();
17363 /*---------------------------------------------------------------------------*/
17364 // Make sure that something is in progress. Otherwise we will simply stop
17365 // and nothing more will happen.
17366 /*---------------------------------------------------------------------------*/
17367     systemErrorLab(signal, __LINE__);
17368     return;
17369   }//if
17370   return;
17371 }//Dblqh::copyCompletedLab()
17372 
nextRecordCopy(Signal * signal,const TcConnectionrecPtr tcConnectptr)17373 void Dblqh::nextRecordCopy(Signal* signal,
17374                            const TcConnectionrecPtr tcConnectptr)
17375 {
17376   TcConnectionrec * const regTcPtr = tcConnectptr.p;
17377 
17378   fragptr.i = regTcPtr->fragmentptr;
17379   c_fragment_pool.getPtr(fragptr);
17380   scanptr.i = regTcPtr->tcScanRec;
17381   ndbrequire(c_scanRecordPool.getValidPtr(scanptr));
17382   ndbrequire(scanptr.p->scanState == ScanRecord::WAIT_LQHKEY_COPY);
17383 /*---------------------------------------------------------------------------*/
17384 // Make sure that nothing is in progress. Otherwise we will have to simultaneous
17385 // scans on the same record and this will certainly lead to unexpected
17386 // behaviour.
17387 /*---------------------------------------------------------------------------*/
17388   ScanRecord * const scanPtr = scanptr.p;
17389   ndbrequire(fragptr.p->fragStatus == Fragrecord::FSACTIVE);
17390 
17391   regTcPtr->errorCode = 0;
17392   if (c_copy_frag_live_node_performing_halt)
17393   {
17394     jam();
17395     ndbrequire(c_tc_connect_rec_copy_frag ==
17396                tcConnectptr.i);
17397 
17398     if (regTcPtr->copyCountWords == 0)
17399     {
17400       jam();
17401       /* No more outstanding copy rows. We are only waiting now. */
17402       DEB_COPY(("(%u):Copy fragment process halted", instance()));
17403       scanPtr->scanState = ScanRecord::COPY_FRAG_HALTED;
17404       scanPtr->scan_lastSeen = __LINE__;
17405       c_copy_frag_live_node_halted = true;
17406       c_copy_frag_live_node_performing_halt = false;
17407       send_halt_copy_frag_conf(signal, false);
17408     }
17409     return;
17410   }
17411   Uint32 acc_op_ptr= get_acc_ptr_from_scan_record(scanptr.p, 0, false);
17412   SimulatedBlock *block = scanPtr->scanBlock;
17413   ExecFunction f = scanPtr->scanFunction_NEXT_SCANREQ;
17414   const Uint32 sig0 = scanPtr->scanAccPtr;
17415   Uint32 in_send_next_scan = m_in_send_next_scan;
17416 
17417   /**
17418    * Here I can assign theData[1] through acc_op_ptr in the case of
17419    * acc_op_ptr != RNIL and also in the case of acc_op_ptr == RNIL
17420    * since theData[1] is assigned RNIL in the case when acc_op_ptr == RNIL
17421    */
17422   signal->theData[0] = sig0;
17423   signal->theData[1] = acc_op_ptr;
17424   signal->theData[2] = acc_op_ptr != RNIL ?
17425                        NextScanReq::ZSCAN_NEXT_COMMIT :
17426                        NextScanReq::ZSCAN_NEXT;
17427   /**
17428    * No need to commit (unlock) if no previous operation in ACC
17429    */
17430   scanPtr->scanState = ScanRecord::WAIT_NEXT_SCAN_COPY;
17431   if (unlikely(in_send_next_scan == 0))
17432   {
17433     send_next_NEXT_SCANREQ(signal,
17434                            block,
17435                            f,
17436                            scanPtr,
17437                            regTcPtr->clientConnectrec);
17438     return;
17439   }
17440   scanPtr->scan_lastSeen = __LINE__;
17441   ndbassert(in_send_next_scan == 1);
17442   m_in_send_next_scan = 2;
17443   /**
17444    * See explanation in scanNextLoopLab(...)
17445    */
17446 }//Dblqh::nextRecordCopy()
17447 
copyLqhKeyRefLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)17448 void Dblqh::copyLqhKeyRefLab(Signal* signal,
17449                              const TcConnectionrecPtr tcConnectptr)
17450 {
17451   jamDebug();
17452   ndbrequire(tcConnectptr.p->transid[1] == signal->theData[4]);
17453   Uint32 copyWords = signal->theData[3];
17454   scanptr.p->scanErrorCounter++;
17455   tcConnectptr.p->errorCode = terrorCode;
17456 
17457   LqhKeyConf* conf = (LqhKeyConf*)signal->getDataPtrSend();
17458   conf->transId1 = copyWords;
17459   conf->transId2 = tcConnectptr.p->transid[1];
17460   copyCompletedLab(signal, tcConnectptr);
17461 }//Dblqh::copyLqhKeyRefLab()
17462 
closeCopyLab(Signal * signal,TcConnectionrec * regTcPtr)17463 void Dblqh::closeCopyLab(Signal* signal,
17464                          TcConnectionrec* regTcPtr)
17465 {
17466   ScanRecord * const scanPtr = scanptr.p;
17467 
17468   if (regTcPtr->copyCountWords > 0) {
17469 /*---------------------------------------------------------------------------*/
17470 // We are still waiting for responses from the starting node.
17471 // Wait until all of those have arrived until we start the
17472 // close process.
17473 /*---------------------------------------------------------------------------*/
17474     jam();
17475     scanPtr->scanState = ScanRecord::WAIT_LQHKEY_COPY;
17476     scanPtr->scan_lastSeen = __LINE__;
17477     return;
17478   }//if
17479   fragptr.i = regTcPtr->fragmentptr;
17480   regTcPtr->transid[0] = 0;
17481   regTcPtr->transid[1] = 0;
17482   c_fragment_pool.getPtr(fragptr);
17483 
17484   /**
17485    * Stop sending ROWID for all operations from now on
17486    */
17487   fragptr.p->m_copy_started_state = Fragrecord::AC_NORMAL;
17488   if (ERROR_INSERTED(5714))
17489   {
17490     ndbout_c("Copy of tab(%u,%u) complete",
17491              fragptr.p->tabRef, fragptr.p->fragId);
17492   }
17493 
17494   Fragrecord::FragStatus fragstatus = fragptr.p->fragStatus;
17495 
17496   const Uint32 sig0 = scanPtr->scanAccPtr;
17497   SimulatedBlock *block = scanPtr->scanBlock;
17498   ExecFunction f = scanPtr->scanFunction_NEXT_SCANREQ;
17499 
17500   scanPtr->scanState = ScanRecord::WAIT_CLOSE_COPY;
17501   scanPtr->scan_lastSeen = __LINE__;
17502   scanPtr->scan_check_lcp_stop = 0;
17503   signal->theData[0] = sig0;
17504   signal->theData[1] = RNIL;
17505   signal->theData[2] = NextScanReq::ZSCAN_CLOSE;
17506   ndbrequire(fragstatus == Fragrecord::FSACTIVE);
17507   scanPtr->scanAccPtr = RNIL;
17508   block->EXECUTE_DIRECT_FN(f, signal);
17509 }//Dblqh::closeCopyLab()
17510 
17511 /*---------------------------------------------------------------------------*/
17512 /*   ENTER NEXT_SCANCONF WITH                                                */
17513 /*     SCANPTR,                                                              */
17514 /*     TFRAGID,                                                              */
17515 /*     TACC_OPPTR,                                                           */
17516 /*     TLOCAL_KEY1,                                                          */
17517 /*     TLOCAL_KEY2,                                                          */
17518 /*     TKEY_LENGTH,                                                          */
17519 /*     TKEY1,                                                                */
17520 /*     TKEY2,                                                                */
17521 /*     TKEY3,                                                                */
17522 /*     TKEY4                                                                 */
17523 /*---------------------------------------------------------------------------*/
17524 /*   PRECONDITION: SCAN_STATE = WAIT_CLOSE_COPY                              */
17525 /*---------------------------------------------------------------------------*/
accCopyCloseConfLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)17526 void Dblqh::accCopyCloseConfLab(Signal* signal,
17527                                 const TcConnectionrecPtr tcConnectptr)
17528 {
17529   ScanRecord * const scanPtr = scanptr.p;
17530   TcConnectionrec * const regTcPtr = tcConnectptr.p;
17531   const Uint32 sig0 = regTcPtr->tupConnectrec;
17532   const Uint32 sig1 = regTcPtr->tableref;
17533   const Uint32 sig2 = scanPtr->scanSchemaVersion;
17534   const Uint32 sig4 = scanPtr->scanStoredProcId;
17535   const Uint32 sig5 = scanPtr->scanApiBlockref;
17536 
17537   signal->theData[0] = sig0;
17538   signal->theData[1] = sig1;
17539   signal->theData[2] = sig2;
17540   signal->theData[3] = ZDELETE_STORED_PROC_ID;
17541   signal->theData[4] = sig4;
17542   signal->theData[5] = sig5;
17543   c_tup->execSTORED_PROCREQ(signal);
17544   jamEntryDebug();
17545   scanPtr->scanStoredProcId = RNIL;
17546   tupCopyCloseConfLab(signal, tcConnectptr);
17547   return;
17548 }//Dblqh::accCopyCloseConfLab()
17549 
17550 /*---------------------------------------------------------------------------*/
17551 /*   ENTER STORED_PROCCONF WITH                                              */
17552 /*     0 success = CONF, 1 failure == REF                                    */
17553 /*     STORED_PROC_ID                                                        */
17554 /*---------------------------------------------------------------------------*/
tupCopyCloseConfLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)17555 void Dblqh::tupCopyCloseConfLab(Signal* signal,
17556                                 const TcConnectionrecPtr tcConnectptr)
17557 {
17558   c_tc_connect_rec_copy_frag = RNIL;
17559 
17560   if (tcConnectptr.p->abortState == TcConnectionrec::NEW_FROM_TC)
17561   {
17562     jam();
17563     TcNodeFailRecordPtr tcNodeFailPtr;
17564     tcNodeFailPtr.i = tcConnectptr.p->tcNodeFailrec;
17565     ptrCheckGuard(tcNodeFailPtr, ctcNodeFailrecFileSize, tcNodeFailRecord);
17566     tcNodeFailPtr.p->tcRecNow = tcConnectptr.i + 1;
17567     signal->theData[0] = ZLQH_TRANS_NEXT;
17568     signal->theData[1] = tcNodeFailPtr.i;
17569     sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
17570 
17571     CopyFragRef * const ref = (CopyFragRef *)&signal->theData[0];
17572     ref->userPtr = scanptr.p->copyPtr;
17573     ref->sendingNodeId = cownNodeid;
17574     ref->startingNodeId = scanptr.p->scanNodeId;
17575     ref->tableId = fragptr.p->tabRef;
17576     ref->fragId = fragptr.p->fragId;
17577     ref->errorCode = ZNODE_FAILURE_ERROR;
17578     sendSignal(tcConnectptr.p->clientBlockref, GSN_COPY_FRAGREF, signal,
17579                CopyFragRef::SignalLength, JBB);
17580   }
17581   else
17582   {
17583     if (c_copy_frag_live_node_performing_halt)
17584     {
17585       jam();
17586       send_halt_copy_frag_conf(signal, true);
17587       c_copy_frag_live_node_performing_halt = false;
17588     }
17589     ndbrequire(!c_copy_frag_live_node_halted);
17590 
17591     if (scanptr.p->scanErrorCounter > 0)
17592     {
17593       jam();
17594       CopyFragRef * const ref = (CopyFragRef *)&signal->theData[0];
17595       ref->userPtr = scanptr.p->copyPtr;
17596       ref->sendingNodeId = cownNodeid;
17597       ref->startingNodeId = scanptr.p->scanNodeId;
17598       ref->tableId = fragptr.p->tabRef;
17599       ref->fragId = fragptr.p->fragId;
17600       ref->errorCode = tcConnectptr.p->errorCode;
17601       sendSignal(tcConnectptr.p->clientBlockref, GSN_COPY_FRAGREF, signal,
17602                  CopyFragRef::SignalLength, JBB);
17603     }
17604     else
17605     {
17606       jam();
17607       DEB_COPY(("(%u)COPY_FRAGCONF tab(%u,%u)",
17608                 instance(),
17609                 tcConnectptr.p->tableref,
17610                 tcConnectptr.p->fragmentid));
17611       CopyFragConf * const conf = (CopyFragConf *)&signal->theData[0];
17612       conf->userPtr = scanptr.p->copyPtr;
17613       conf->sendingNodeId = cownNodeid;
17614       conf->startingNodeId = scanptr.p->scanNodeId;
17615       conf->tableId = tcConnectptr.p->tableref;
17616       conf->fragId = tcConnectptr.p->fragmentid;
17617       conf->rows_lo = scanptr.p->m_curr_batch_size_rows;
17618       conf->bytes_lo = scanptr.p->m_curr_batch_size_bytes;
17619       sendSignal(tcConnectptr.p->clientBlockref, GSN_COPY_FRAGCONF, signal,
17620 		 CopyFragConf::SignalLength, JBB);
17621     }//if
17622   }//if
17623   releaseActiveCopy(signal);
17624   handle_finish_scan(signal, tcConnectptr);
17625 }//Dblqh::tupCopyCloseConfLab()
17626 
17627 /*---------------------------------------------------------------------------*/
17628 /*   A NODE FAILURE OCCURRED DURING THE COPY PROCESS. WE NEED TO CLOSE THE   */
17629 /*   COPY PROCESS SINCE A NODE FAILURE DURING THE COPY PROCESS WILL ALSO     */
17630 /*   FAIL THE NODE THAT IS TRYING TO START-UP.                               */
17631 /*---------------------------------------------------------------------------*/
closeCopyRequestLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)17632 void Dblqh::closeCopyRequestLab(Signal* signal,
17633                                 const TcConnectionrecPtr tcConnectptr)
17634 {
17635   setup_scan_pointers_from_tc_con(tcConnectptr);
17636   scanptr.p->scanErrorCounter++;
17637   if (0) ndbout_c("closeCopyRequestLab: scanState: %d", scanptr.p->scanState);
17638   switch (scanptr.p->scanState) {
17639   case ScanRecord::WAIT_TUPKEY_COPY:
17640   case ScanRecord::WAIT_NEXT_SCAN_COPY:
17641     jam();
17642 /*---------------------------------------------------------------------------*/
17643 /*   SET COMPLETION STATUS AND WAIT FOR OPPORTUNITY TO STOP THE SCAN.        */
17644 //   ALSO SET NO OF WORDS OUTSTANDING TO ZERO TO AVOID ETERNAL WAIT.
17645 /*---------------------------------------------------------------------------*/
17646     scanptr.p->scanCompletedStatus = ZTRUE;
17647     tcConnectptr.p->copyCountWords = 0;
17648     break;
17649   case ScanRecord::WAIT_ACC_COPY:
17650     jam();
17651 /*---------------------------------------------------------------------------*/
17652 /*   WE ARE CURRENTLY STARTING UP THE SCAN. SET COMPLETED STATUS AND WAIT FOR*/
17653 /*   COMPLETION OF STARTUP.                                                  */
17654 /*---------------------------------------------------------------------------*/
17655     scanptr.p->scanCompletedStatus = ZTRUE;
17656     break;
17657   case ScanRecord::WAIT_CLOSE_COPY:
17658     jam();
17659 /*---------------------------------------------------------------------------*/
17660 /*   CLOSE IS ALREADY ONGOING. WE NEED NOT DO ANYTHING.                      */
17661 /*---------------------------------------------------------------------------*/
17662     break;
17663   case ScanRecord::COPY_FRAG_HALTED:
17664     jam();
17665     c_copy_frag_live_node_halted = false;
17666     /* Fall through */
17667   case ScanRecord::WAIT_LQHKEY_COPY:
17668     jam();
17669 /*---------------------------------------------------------------------------*/
17670 /*   WE ARE WAITING FOR THE FAILED NODE. THE NODE WILL NEVER COME BACK.      */
17671 //   WE NEED TO START THE FAILURE HANDLING IMMEDIATELY.
17672 //   ALSO SET NO OF WORDS OUTSTANDING TO ZERO TO AVOID ETERNAL WAIT.
17673 /*---------------------------------------------------------------------------*/
17674     tcConnectptr.p->copyCountWords = 0;
17675     closeCopyLab(signal, tcConnectptr.p);
17676     break;
17677   default:
17678     ndbabort();
17679   }//switch
17680   return;
17681 }//Dblqh::closeCopyRequestLab()
17682 
17683 /* ****************************************************** */
17684 /*  COPY_ACTIVEREQ: Change state of a fragment to ACTIVE. */
17685 /* ****************************************************** */
execCOPY_ACTIVEREQ(Signal * signal)17686 void Dblqh::execCOPY_ACTIVEREQ(Signal* signal)
17687 {
17688   /**
17689    * We come here two times for normal stored tables.
17690    * We also come here two times for ordered index tables which
17691    * obviously never need to much in this context.
17692    *
17693    * For NOLOGGING tables we come here one time with flags set,
17694    * the second time to activate REDO logging we obviously need
17695    * to skip since it isn't needed to activate the REDO logging.
17696    *
17697    * We can discover that the table is an ordered index by checking
17698    * isOrderedIndex on tableType on table object.
17699    * We can discover that a table is a temporary or NOLOGGING table
17700    * by looking at the lcpFlag on the fragment.
17701    *
17702    * Thus we need to skip the LCP handling for all ordered indexes
17703    * and for temporary and NOLOGGING tables in all signals.
17704    */
17705   CRASH_INSERTION(5026);
17706 
17707   const CopyActiveReq * const req = (CopyActiveReq *)&signal->theData[0];
17708   jamEntry();
17709   Uint32 masterPtr = req->userPtr;
17710   BlockReference masterRef = req->userRef;
17711   tabptr.i = req->tableId;
17712   ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
17713   Uint32 fragId = req->fragId;
17714   Uint32 flags = req->flags;
17715   if (unlikely(signal->getLength() < CopyActiveReq::SignalLength))
17716   {
17717     jam();
17718     ndbabort(); /* Don't support upgrade from 7.0 */
17719     flags = 0;
17720   }
17721 
17722   ndbrequire(getFragmentrec(signal, fragId));
17723 
17724   fragptr.p->fragStatus = Fragrecord::FSACTIVE;
17725   /* Ensure we don't send Rowid's to any nodes we're not copying to */
17726   fragptr.p->m_copy_started_state = Fragrecord::AC_NORMAL;
17727   fragptr.p->fragDistributionKey = req->distributionKey;
17728   fragptr.p->m_copy_complete_flag = 1;
17729 
17730   if (TRACENR_FLAG)
17731     TRACENR("tab: " << tabptr.i
17732 	    << " frag: " << fragId
17733 	    << " COPY ACTIVE"
17734             << " flags: " << hex << flags << endl);
17735 
17736   ndbrequire(cnoActiveCopy < 3);
17737   cactiveCopy[cnoActiveCopy] = fragptr.i;
17738   cnoActiveCopy++;
17739   fragptr.p->masterBlockref = masterRef;
17740   fragptr.p->masterPtr = masterPtr;
17741 
17742   if (flags)
17743   {
17744     /**
17745      * We send with flags first that indicates no logging
17746      * and no wait, we then send without flags to activate
17747      * REDO logging. We thus use the flags to indicate when
17748      * a new fragment is to be copied.
17749      */
17750     jam();
17751     log_fragment_copied(signal);
17752   }
17753   else
17754   {
17755     jam();
17756     DEB_COPY(("(%u)Activate REDO log of tab(%u,%u)",
17757               instance(),
17758               tabptr.i,
17759               fragId));
17760     CRASH_INSERTION(5091);
17761     /**
17762      * At first COPY_ACTIVEREQ to activate REDO log on any
17763      * fragment means that the copy fragment process is
17764      * completed and we can cease to worry about halt and
17765      * resume of copy fragment process.
17766      *
17767      * We can reach this state if we attempted to halt the
17768      * last fragment to copy and we failed to halt it before
17769      * it was completed. This can happen e.g. if we waited
17770      * for the first LQHKEYREQ.
17771      *
17772      * It can also happen if we sent HALT_COPY_FRAG_REQ,
17773      * in this case we might fail to halt the process and
17774      * the response signal HALT_COPY_FRAG_CONF is raced
17775      * by the COPY_FRAGCONF and COPY_ACTIVEREQ signals that
17776      * are sent through a different path. So this path is
17777      * more uncommon.
17778      */
17779     if (!c_copy_frag_halted &&
17780         c_copy_frag_halt_state == COPY_FRAG_HALT_WAIT_FIRST_LQHKEYREQ)
17781     {
17782       jam();
17783       DEB_LCP(("(%u)Phase 2 of copy fragment started while waiting for "
17784                "LQHKEYREQ",
17785                instance()));
17786       c_copy_frag_halt_state = COPY_FRAG_HALT_STATE_IDLE;
17787     }
17788     if (!c_copy_frag_halted &&
17789         c_copy_frag_halt_process_locked &&
17790         c_copy_frag_halt_state == WAIT_HALT_COPY_FRAG_CONF)
17791     {
17792       jam();
17793       DEB_LCP(("(%u)Phase 2 of copy fragment started while waiting for halt",
17794                instance()));
17795       c_copy_frag_halt_process_locked = false;
17796       c_copy_frag_halt_state = COPY_FRAG_HALT_STATE_IDLE;
17797     }
17798     if (c_copy_frag_halted &&
17799         c_copy_frag_halt_process_locked &&
17800         c_copy_frag_halt_state == WAIT_RESUME_COPY_FRAG_CONF)
17801     {
17802       jam();
17803       DEB_LCP(("(%u)Phase 2 of copy fragment started while resuming",
17804                instance()));
17805       c_copy_frag_halted = false;
17806       c_copy_frag_halt_process_locked = false;
17807       c_copy_frag_halt_state = COPY_FRAG_HALT_STATE_IDLE;
17808     }
17809     ndbrequire(!c_copy_frag_halted &&
17810                c_copy_frag_halt_state == COPY_FRAG_HALT_STATE_IDLE);
17811     if (c_copy_fragment_in_progress)
17812     {
17813       jam();
17814       m_second_activate_fragment_ptr_i = fragptr.i;
17815       sendSignal(NDBCNTR_REF, GSN_COPY_FRAG_NOT_IN_PROGRESS_REP, signal, 1, JBB);
17816       return;
17817     }
17818   }
17819 
17820   /**
17821    * 1st phase (CAR_NO_LOGGING & CAR_NO_WAIT)
17822    * ---------
17823    * Put fragment into Local LCP queue and start executing them
17824    * immediately. We respond without waiting for this activity
17825    * to complete.
17826    *
17827    * 2nd phase (No flags)
17828    * --------------------
17829    * At first COPY_ACTIVEREQ in this 2nd phase
17830    * -->
17831    *
17832    *    We will only receive one COPY_ACTIVEREQ at a time per LDM from DIH.
17833    *    However many LDMs can receive them in parallel although not
17834    *    necessarily all of the LDMs will receive them in parallel.
17835    *
17836    *    So when we receive the first COPY_ACTIVEREQ in this instance
17837    *    then we will send WAIT_ALL_COMPLETE_LCP_REQ to the LQH
17838    *    proxy. After this the LQH proxy will send WAIT_COMPLETE_LCP_REQ
17839    *    to all LDMs to ask them to wait for completion of the local
17840    *    LCP.
17841    *
17842    *    When the local LCP is completed the LDM will send
17843    *    WAIT_COMPLETE_LCP_CONF to the LQH proxy. Then the LQH proxy will
17844    *    send WAIT_ALL_COMPLETE_LCP_CONF to all LDMs. After this it is
17845    *    ok to respond to the COPY_ACTIVEREQ possibly waiting and then
17846    *    the COPY_ACTIVEREQ will continue as usual.
17847    *
17848    *    Record reception of the activate redo log. For tables that
17849    *    are ordered indexes or NOLOGGING tables or temporary tables
17850    *    we will respond immediately.
17851    *
17852    *    When we are done with the first LCP then we will send
17853    *    WAIT_COMPLETE_LCP_CONF to the LQH proxy. When all LDMs have
17854    *    completed this first local LCP then the LQH proxy will
17855    *    send WAIT_ALL_COMPLETE_LCP_CONF to all LDMs.
17856    *
17857    *    Then one could check if it makes sense to run even a 2nd
17858    *    Local LCP before proceeding. This as left as future work.
17859    *
17860    * After this we proceed with restart exactly as before.
17861    *
17862    * An easy way to interact with the LCP processing is to
17863    * simply send a LCP_FRAG_ORD to ourselves. The first
17864    * one will have a firstFragmentFlag. When we receive
17865    * the first COPY_ACTIVEREQ in the second phase we will
17866    * send the LCP_FRAG_ORD with the lastFragmentFlag set.
17867    *
17868    * We only need some local variable indicating that
17869    * we are running Local LCP to ensure that we don't
17870    * send any LCP_FRAG_REP and that we avoid any
17871    * other sends out of the node.
17872    *
17873    * If we want a fragment to be re-executed in the same
17874    * LCP we simply send a new LCP_FRAG_ORD after it has
17875    * completed. A natural place to check this is in the
17876    * completion after one fragment LCP where we can
17877    * issue a new LCP if there are no queued fragments
17878    * for LCP.
17879    *
17880    * If we decide on a second LCP then we simply enter
17881    * all fragments into the queue and wait for it to
17882    * complete.
17883    */
17884   if (flags & CopyActiveReq::CAR_NO_WAIT)
17885   {
17886     jam();
17887     ndbrequire(flags & CopyActiveReq::CAR_NO_LOGGING);
17888     ndbrequire(fragptr.p->activeTcCounter == 0);
17889     if (!DictTabInfo::isOrderedIndex(tabptr.p->tableType) &&
17890         fragptr.p->lcpFlag == Fragrecord::LCP_STATE_TRUE &&
17891         (c_backup->is_partial_lcp_enabled()) &&
17892         !c_full_local_lcp_started)
17893     {
17894       jam();
17895       if (handle_lcp_fragment_first_phase(signal))
17896       {
17897         jam();
17898         return;
17899       }
17900     }
17901     Uint32 save = fragptr.p->startGci;
17902     fragptr.p->startGci = 0;
17903     sendCopyActiveConf(signal, tabptr.i);
17904     fragptr.p->startGci = save;
17905     return;
17906   }
17907   ndbrequire((flags & CopyActiveReq::CAR_NO_WAIT) == 0 &&
17908              (flags & CopyActiveReq::CAR_NO_LOGGING) == 0);
17909   ndbrequire(!c_copy_fragment_in_progress);
17910   if (c_local_lcp_started)
17911   {
17912     jam();
17913     /**
17914      * Copy fragment no longer in progress, we are still
17915      * waiting for local LCP started to complete before
17916      * we can proceed to next step.
17917      */
17918     m_second_activate_fragment_ptr_i = fragptr.i;
17919     return;
17920   }
17921   ndbrequire(m_node_restart_first_local_lcp_started ||
17922              !c_backup->is_partial_lcp_enabled());
17923 
17924   activate_redo_log(signal, tabptr.i, fragId);
17925 }//Dblqh::execCOPY_ACTIVEREQ()
17926 
execCOPY_FRAG_NOT_IN_PROGRESS_REP(Signal * signal)17927 void Dblqh::execCOPY_FRAG_NOT_IN_PROGRESS_REP(Signal *signal)
17928 {
17929   jamEntry();
17930   ndbrequire(c_copy_fragment_in_progress ||
17931              c_num_fragments_created_since_restart == 0);
17932   c_copy_fragment_in_progress = false;
17933   if (c_num_fragments_created_since_restart == 0)
17934   {
17935     jam();
17936     /* No need to do anything here. */
17937     return;
17938   }
17939   /**
17940    * We are now sure that no more local LCPs can be started,
17941    * we still need to wait until the current one (if a current
17942    * one is running) is completed before we proceed with
17943    * activation of the REDO logs).
17944    */
17945   if (!c_local_lcp_started)
17946   {
17947     jam();
17948     /* No local LCP ongoing, ready to proceed */
17949     if (m_second_activate_fragment_ptr_i == RNIL)
17950     {
17951       jam();
17952       /**
17953        * We haven't received the first COPY_ACTIVEREQ in
17954        * activate REDO log phase yet. We act when this signal
17955        * is received, no need to do anything now.
17956        */
17957       return;
17958     }
17959     fragptr.i = m_second_activate_fragment_ptr_i;
17960     m_second_activate_fragment_ptr_i = RNIL;
17961     c_fragment_pool.getPtr(fragptr);
17962     activate_redo_log(signal, fragptr.p->tabRef, fragptr.p->fragId);
17963     return;
17964   }
17965   DEB_LOCAL_LCP(("(%u)Started second phase of Copy fragment, wait all LCP",
17966                  instance()));
17967   /**
17968    * Local LCP is ongoing, if a full local LCP is ongoing
17969    * we need not do anything since NDBCNTR will either have
17970    * already received WAIT_ALL_COMPLETE_LCP_REQ or will
17971    * receive it when we are ready to send it.
17972    *
17973    * If a local LCP is ongoing, but not a full local LCP
17974    * then we need to send WAIT_ALL_COMPLETE_LCP_REQ from
17975    * here to indicate that we are ready for the complete
17976    * local LCP phase.
17977    */
17978   if (!c_full_local_lcp_started)
17979   {
17980     jam();
17981     signal->theData[0] = reference();
17982     c_local_lcp_sent_wait_all_complete_lcp_req = true;
17983     sendSignal(NDBCNTR_REF, GSN_WAIT_ALL_COMPLETE_LCP_REQ, signal, 1, JBB);
17984   }
17985   ndbrequire(c_localLcpId != 0 ||
17986              c_local_lcp_sent_wait_complete_conf);
17987 }
17988 
activate_redo_log(Signal * signal,Uint32 tabPtrI,Uint32 fragId)17989 void Dblqh::activate_redo_log(Signal *signal,
17990                               Uint32 tabPtrI,
17991                               Uint32 fragId)
17992 {
17993   tabptr.i = tabPtrI;
17994   ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
17995   if (DictTabInfo::isOrderedIndex(tabptr.p->tableType) ||
17996       fragptr.p->lcpFlag != Fragrecord::LCP_STATE_TRUE)
17997   {
17998     jam();
17999     sendCopyActiveConf(signal, tabptr.i);
18000     return;
18001   }
18002   if (fragptr.p->lcpFlag == Fragrecord::LCP_STATE_TRUE)
18003   {
18004     jam();
18005     fragptr.p->logFlag = Fragrecord::STATE_TRUE;
18006   }
18007 
18008   fragptr.p->activeTcCounter = 1;
18009 /*------------------------------------------------------*/
18010 /*       SET IT TO ONE TO ENSURE THAT IT IS NOT POSSIBLE*/
18011 /*       TO DECREASE IT TO ZERO UNTIL WE HAVE COMPLETED */
18012 /*       THE SCAN.                                      */
18013 /*------------------------------------------------------*/
18014   signal->theData[0] = ZSCAN_TC_CONNECT;
18015   signal->theData[1] = 0;
18016   signal->theData[2] = tabptr.i;
18017   signal->theData[3] = fragId;
18018   sendSignal(cownref, GSN_CONTINUEB, signal, 4, JBB);
18019 }
18020 
handle_lcp_fragment_first_phase(Signal * signal)18021 bool Dblqh::handle_lcp_fragment_first_phase(Signal *signal)
18022 {
18023   if (!m_node_restart_first_local_lcp_started)
18024   {
18025     jam();
18026     c_saveLcpId = c_lcpId;
18027     DEB_LCP(("(%u)c_lcpId = %u", instance(), c_lcpId));
18028     /**
18029      * Set first fragment flag in LCP_FRAG_ORD by
18030      * that c_lcpId != RNIL from start, so this will
18031      * set firstFragmentFlag to true.
18032      */
18033     m_first_activate_fragment_ptr_i = fragptr.i;
18034     signal->theData[0] = 0; /* Indicate normal local LCP started */
18035     sendSignal(NDBCNTR_REF, GSN_START_LOCAL_LCP_ORD, signal,
18036                1, JBB);
18037     return true;
18038   }
18039   if (fragptr.p->m_local_lcp_instance_started !=
18040       c_current_local_lcp_instance)
18041   {
18042     jam();
18043     sendLCP_FRAG_ORD(signal, fragptr.i);
18044   }
18045   return false;
18046 }
18047 
start_local_lcp(Signal * signal,Uint32 lcpId,Uint32 localLcpId)18048 void Dblqh::start_local_lcp(Signal *signal,
18049                             Uint32 lcpId,
18050                             Uint32 localLcpId)
18051 {
18052   if (!m_node_restart_first_local_lcp_started)
18053   {
18054     jam();
18055     /**
18056      * No more necessary to stop and wait for NDBCNTR to start the very
18057      * first local LCP before we proceed.
18058      */
18059     m_node_restart_first_local_lcp_started = true;
18060     c_saveLcpId = c_lcpId;
18061   }
18062 
18063   /**
18064    * The (lcpId, localLcpId) tuple must be higher than last start.
18065    * If equal than a local LCP must be ongoing already and we are
18066    * now starting a full local LCP.
18067    */
18068   ndbrequire(lcpId > m_curr_lcp_id ||
18069              (lcpId == m_curr_lcp_id &&
18070               localLcpId > m_curr_local_lcp_id) ||
18071              (lcpId == m_curr_lcp_id &&
18072               localLcpId == m_curr_local_lcp_id &&
18073               c_full_local_lcp_started &&
18074               c_local_lcp_started));
18075 
18076   m_curr_lcp_id = lcpId;
18077   m_curr_local_lcp_id = localLcpId;
18078   c_localLcpId = 1;
18079   if (!c_local_lcp_started)
18080   {
18081     jam();
18082     /**
18083      * First fragment of new local LCP started.
18084      * Toggle c_current_local_lcp_instance.
18085      */
18086     c_local_lcp_started = true;
18087     c_max_keep_gci_in_lcp =
18088       (crestartNewestGci == 0 ||
18089        crestartNewestGci == ZUNDEFINED_GCI_LIMIT) ?
18090                             2 : crestartNewestGci;
18091     c_keep_gci_for_lcp = c_max_keep_gci_in_lcp;
18092     c_first_set_min_keep_gci = true;
18093     c_current_local_lcp_instance++;
18094     c_current_local_lcp_instance &= 1;
18095     DEB_LOCAL_LCP(("(%u)c_current_local_lcp_instance: %u",
18096                    instance(),
18097                    c_current_local_lcp_instance));
18098   }
18099   if (m_first_activate_fragment_ptr_i != RNIL)
18100   {
18101     jam();
18102     fragptr.i = m_first_activate_fragment_ptr_i;
18103     m_first_activate_fragment_ptr_i = RNIL;
18104     c_fragment_pool.getPtr(fragptr);
18105     Uint32 save = fragptr.p->startGci;
18106     fragptr.p->startGci = 0;
18107     sendCopyActiveConf(signal, fragptr.p->tabRef);
18108     fragptr.p->startGci = save;
18109     DEB_LOCAL_LCP(("(%u)Written START LCP to sysfile for normal path",
18110                    instance()));
18111     if (!c_full_local_lcp_started &&
18112         (fragptr.p->m_local_lcp_instance_started !=
18113          c_current_local_lcp_instance))
18114     {
18115       jam();
18116       sendLCP_FRAG_ORD(signal, fragptr.i);
18117     }
18118   }
18119 }
18120 
18121 void
execSTART_LOCAL_LCP_ORD(Signal * signal)18122 Dblqh::execSTART_LOCAL_LCP_ORD(Signal *signal)
18123 {
18124   if (c_num_fragments_created_since_restart == 0)
18125   {
18126     jam();
18127     c_local_lcp_sent_wait_all_complete_lcp_req = true;
18128     sendSignal(NDBCNTR_REF, GSN_WAIT_ALL_COMPLETE_LCP_REQ, signal, 1, JBB);
18129     return;
18130   }
18131   Uint32 lcpId = signal->theData[0];
18132   Uint32 localLcpId = signal->theData[1];
18133   start_local_lcp(signal, lcpId, localLcpId);
18134 }
18135 
execSTART_FULL_LOCAL_LCP_ORD(Signal * signal)18136 void Dblqh::execSTART_FULL_LOCAL_LCP_ORD(Signal *signal)
18137 {
18138   Uint32 lcpId = signal->theData[0];
18139   Uint32 localLcpId = signal->theData[1];
18140 
18141   c_full_local_lcp_started = true;
18142   if ((c_local_lcp_started &&
18143        c_localLcpId == 0) ||
18144        c_num_fragments_created_since_restart == 0)
18145   {
18146     /**
18147      * We have started a local LCP already. If we haven't
18148      * already sent WAIT_ALL_COMPLETE_LCP_REQ we will send
18149      * it now, if this is sent but not WAIT_COMPLETE_LCP_CONF
18150      * then we will send that signal.
18151      */
18152     if (c_local_lcp_sent_wait_all_complete_lcp_req)
18153     {
18154       jam();
18155       if (!c_local_lcp_sent_wait_complete_conf)
18156       {
18157         c_local_lcp_sent_wait_complete_conf = true;
18158         WaitCompleteLcpConf *conf =
18159           (WaitCompleteLcpConf*)signal->getDataPtrSend();
18160         conf->senderRef = reference();
18161         conf->lcpId = lcpId;
18162         conf->localLcpId = localLcpId;
18163         conf->maxGciInLcp = c_max_gci_in_lcp;
18164         conf->maxKeepGci = c_max_keep_gci_in_lcp;
18165         sendSignal(NDBCNTR_REF, GSN_WAIT_COMPLETE_LCP_CONF, signal,
18166                    WaitCompleteLcpConf::SignalLength, JBB);
18167       }
18168       return;
18169     }
18170     else
18171     {
18172       jam();
18173       c_local_lcp_sent_wait_all_complete_lcp_req = true;
18174       sendSignal(NDBCNTR_REF, GSN_WAIT_ALL_COMPLETE_LCP_REQ, signal, 1, JBB);
18175     }
18176     return;
18177   }
18178   start_local_lcp(signal, lcpId, localLcpId);
18179   DEB_LOCAL_LCP(("(%u): start_full_local_lcp", instance()));
18180   c_current_local_lcp_table_id = 0;
18181   start_lcp_on_table(signal);
18182 }
18183 
18184 void
sendLCP_FRAG_ORD(Signal * signal,Uint32 fragPtrI)18185 Dblqh::sendLCP_FRAG_ORD(Signal *signal, Uint32 fragPtrI)
18186 {
18187   /* Send LCP_FRAG_ORD for the fragment. */
18188   LcpFragOrd *lcpFragOrd = (LcpFragOrd *)signal->getDataPtrSend();
18189   fragptr.i = fragPtrI;
18190   c_fragment_pool.getPtr(fragptr);
18191   fragptr.p->m_local_lcp_instance_started++;
18192   fragptr.p->m_local_lcp_instance_started &= 1;
18193   ndbrequire(c_current_local_lcp_instance ==
18194              fragptr.p->m_local_lcp_instance_started);
18195 
18196   lcpFragOrd->tableId = fragptr.p->tabRef;
18197   lcpFragOrd->fragmentId = fragptr.p->fragId;
18198   lcpFragOrd->lcpNo = 0;
18199   lcpFragOrd->lcpId = m_curr_lcp_id;
18200   lcpFragOrd->lastFragmentFlag = false;
18201   lcpFragOrd->keepGci = 0;
18202   sendSignal(reference(), GSN_LCP_FRAG_ORD, signal,
18203              LcpFragOrd::SignalLength, JBB);
18204 }
18205 
complete_local_lcp(Signal * signal)18206 void Dblqh::complete_local_lcp(Signal *signal)
18207 {
18208   /**
18209    * We have completed our local LCP, we still need to wait for the
18210    * rest of the LDMs to finish their local LCP.
18211    */
18212   DEB_LCP(("(%u)Completed local LCP", instance()));
18213   c_localLcpId = 0;
18214   c_lcpId = c_saveLcpId;
18215   DEB_LCP(("(%u)Restored c_lcpId = %u", instance(), c_lcpId));
18216 
18217   if (c_full_local_lcp_started)
18218   {
18219     jam();
18220     if (!c_local_lcp_sent_wait_all_complete_lcp_req)
18221     {
18222       jam();
18223       signal->theData[0] = reference();
18224       c_local_lcp_sent_wait_all_complete_lcp_req = true;
18225       sendSignal(NDBCNTR_REF, GSN_WAIT_ALL_COMPLETE_LCP_REQ, signal, 1, JBB);
18226       return;
18227     }
18228   }
18229   jam();
18230   ndbrequire(c_local_lcp_sent_wait_all_complete_lcp_req);
18231   ndbrequire(!c_local_lcp_sent_wait_complete_conf);
18232   c_local_lcp_sent_wait_complete_conf = true;
18233   WaitCompleteLcpConf *conf =
18234     (WaitCompleteLcpConf*)signal->getDataPtrSend();
18235   conf->senderRef = reference();
18236   conf->lcpId = m_curr_lcp_id;
18237   conf->localLcpId = m_curr_local_lcp_id;
18238   conf->maxGciInLcp = c_max_gci_in_lcp;
18239   conf->maxKeepGci = c_max_keep_gci_in_lcp;
18240   sendSignal(NDBCNTR_REF, GSN_WAIT_COMPLETE_LCP_CONF, signal,
18241              WaitCompleteLcpConf::SignalLength, JBB);
18242 }
18243 
execWAIT_COMPLETE_LCP_REQ(Signal * signal)18244 void Dblqh::execWAIT_COMPLETE_LCP_REQ(Signal *signal)
18245 {
18246   /**
18247    * Check if we need to handle the case where an LDM have no
18248    * fragments defined. This could e.g. happen after a config change
18249    * where we have added more LDMs to a node.
18250    *
18251    * If this happens then we will still have
18252    * m_node_restart_first_local_lcp_started equal to false. In this
18253    * case we skip immediately to complete_local_lcp. Could
18254    * also happen since we configured to not use partial LCP.
18255    */
18256   c_local_lcp_sent_wait_complete_conf = false;
18257   c_local_lcp_sent_wait_all_complete_lcp_req = true;
18258   if (c_num_fragments_created_since_restart > 0)
18259   {
18260     if (!m_node_restart_first_local_lcp_started)
18261     {
18262       jam();
18263       c_saveLcpId = c_lcpId;
18264       complete_local_lcp(signal);
18265       return;
18266     }
18267     if (!c_full_local_lcp_started)
18268     {
18269       jam();
18270       /**
18271        * Normal path, no LCP was started due to UNDO log overload.
18272        * We still started a LCP and now that all fragments have
18273        * completed synchronisation we can complete the
18274        * local LCP and as soon as this is done we can continue
18275        * the restart processing.
18276        */
18277       send_lastLCP_FRAG_ORD(signal);
18278       return;
18279     }
18280   }
18281   if (c_localLcpId == 0)
18282   {
18283     jam();
18284     /**
18285      * We had an local LCP ordered due to UNDO log overload, this
18286      * have already completed LCP of all fragments, so we're ready to
18287      * continue.
18288      */
18289     c_local_lcp_sent_wait_complete_conf = true;
18290     signal->theData[0] = reference();
18291     WaitCompleteLcpConf *conf =
18292       (WaitCompleteLcpConf*)signal->getDataPtrSend();
18293     conf->senderRef = reference();
18294     conf->lcpId = m_curr_lcp_id;
18295     conf->localLcpId = m_curr_local_lcp_id;
18296     conf->maxGciInLcp = c_max_gci_in_lcp;
18297     conf->maxKeepGci = c_max_keep_gci_in_lcp;
18298     sendSignal(NDBCNTR_REF, GSN_WAIT_COMPLETE_LCP_CONF, signal,
18299                WaitCompleteLcpConf::SignalLength, JBB);
18300     return;
18301   }
18302   ndbrequire(c_num_fragments_created_since_restart > 0);
18303   /**
18304    * A local LCP was ordered due to UNDO log overload, this haven't
18305    * completed yet. So we need to wait until it is completed until
18306    * we proceed to next step.
18307    */
18308 }
18309 
send_lastLCP_FRAG_ORD(Signal * signal)18310 void Dblqh::send_lastLCP_FRAG_ORD(Signal *signal)
18311 {
18312   /**
18313    * To ensure that we reach the correct path we set lcpId equal to
18314    * c_lcpId here. It will later be restored to its original value
18315    * using c_saveLcpId.
18316    */
18317   DEB_LOCAL_LCP(("(%u)Send last LCP_FRAG_ORD, c_full_local_lcp_started: %u",
18318                  instance(),
18319                  c_full_local_lcp_started));
18320   LcpFragOrd *lcpFragOrd = (LcpFragOrd *)signal->getDataPtrSend();
18321   lcpFragOrd->tableId = RNIL;
18322   lcpFragOrd->fragmentId = RNIL;
18323   lcpFragOrd->lcpNo = 0;
18324   lcpFragOrd->lcpId = c_lcpId;
18325   lcpFragOrd->lastFragmentFlag = true;
18326   lcpFragOrd->keepGci = 0;
18327   sendSignal(reference(), GSN_LCP_FRAG_ORD, signal,
18328              LcpFragOrd::SignalLength, JBB);
18329 }
18330 
execWAIT_ALL_COMPLETE_LCP_CONF(Signal * signal)18331 void Dblqh::execWAIT_ALL_COMPLETE_LCP_CONF(Signal *signal)
18332 {
18333   /**
18334    * We have completed waiting for Local LCPs to complete in LDMs.
18335    * All LDMs will receive this, but it is not necessary that we
18336    * have any waiting fragment to activate, this could happen
18337    * either if the LDM is a new one or if the parallelism is lower
18338    * in the DIH than the number of LDMs.
18339    *
18340    * When this signal arrives we are certain that the LCP is
18341    * fully restorable since we also waited for the GCI to restore
18342    * before we sent this message. So we are also ready here to
18343    * cut the log tail of the UNDO log and the log tail of the
18344    * REDO log.
18345    */
18346   ndbrequire(c_local_lcp_sent_wait_complete_conf);
18347   c_local_lcp_sent_wait_complete_conf = false;
18348   ndbrequire(c_local_lcp_sent_wait_all_complete_lcp_req);
18349   c_local_lcp_sent_wait_all_complete_lcp_req = false;
18350   c_local_lcp_started = false;
18351   c_full_local_lcp_started = false;
18352   DEB_LOCAL_LCP(("(%u)All LDMs have completed local LCP", instance()));
18353   if (m_second_activate_fragment_ptr_i == RNIL)
18354   {
18355     jam();
18356     return;
18357   }
18358   fragptr.i = m_second_activate_fragment_ptr_i;
18359   m_second_activate_fragment_ptr_i = RNIL;
18360   c_fragment_pool.getPtr(fragptr);
18361   activate_redo_log(signal, fragptr.p->tabRef, fragptr.p->fragId);
18362 }
18363 
18364 void
start_lcp_on_table(Signal * signal)18365 Dblqh::start_lcp_on_table(Signal *signal)
18366 {
18367   while (c_current_local_lcp_table_id < ctabrecFileSize)
18368   {
18369     jam();
18370     tabptr.i = c_current_local_lcp_table_id;
18371     ptrAss(tabptr, tablerec);
18372     if ((tabptr.p->tableStatus == Tablerec::TABLE_DEFINED ||
18373          tabptr.p->tableStatus == Tablerec::TABLE_READ_ONLY) &&
18374         (!DictTabInfo::isOrderedIndex(tabptr.p->tableType)))
18375     {
18376       jam();
18377       for (Uint32 i = 0; i < NDB_ARRAY_SIZE(tabptr.p->fragid); i++)
18378       {
18379         jam();
18380         if (tabptr.p->fragid[i] != ZNIL)
18381         {
18382           jam();
18383           fragptr.i = tabptr.p->fragrec[i];
18384           c_fragment_pool.getPtr(fragptr);
18385           ndbrequire(!handle_lcp_fragment_first_phase(signal));
18386         }
18387       }
18388       c_current_local_lcp_table_id++;
18389       signal->theData[0] = ZSTART_LOCAL_LCP;
18390       sendSignal(reference(), GSN_CONTINUEB, signal,
18391                  1, JBB);
18392       return;
18393     }
18394     c_current_local_lcp_table_id++;
18395   }
18396   /**
18397    * We're done starting all fragments, so now time to send
18398    * the LCP_FRAG_ORD with lastFragmentFlag set to true.
18399    */
18400   send_lastLCP_FRAG_ORD(signal);
18401 }
18402 
execCUT_REDO_LOG_TAIL_REQ(Signal * signal)18403 void Dblqh::execCUT_REDO_LOG_TAIL_REQ(Signal *signal)
18404 {
18405   Uint32 keepGci = signal->theData[0];
18406   setLogTail(signal, keepGci);
18407   sendSignal(NDBCNTR_REF, GSN_CUT_REDO_LOG_TAIL_CONF, signal,
18408              1, JBB);
18409 }
18410 
scanTcConnectLab(Signal * signal,Uint32 tstartTcConnect,Uint32 fragId)18411 void Dblqh::scanTcConnectLab(Signal* signal, Uint32 tstartTcConnect, Uint32 fragId)
18412 {
18413   ndbrequire(getFragmentrec(signal, fragId));
18414   TcConnectionrecPtr tcConnectptr;
18415   Uint32 next = tstartTcConnect;
18416   for (Uint32 i = 0; i < 200; i++)
18417   {
18418     bool found = getNextTcConRec(next, tcConnectptr, 10);
18419     if (next != RNIL && !found)
18420     {
18421       jam();
18422       i += 10;
18423       continue;
18424     }
18425     else if (next == RNIL)
18426     {
18427       jam();
18428       break;
18429     }
18430     if (tcConnectptr.p->transactionState != TcConnectionrec::IDLE) {
18431       switch (tcConnectptr.p->logWriteState) {
18432       case TcConnectionrec::NOT_WRITTEN:
18433         jam();
18434         if (fragptr.i == tcConnectptr.p->fragmentptr) {
18435           jam();
18436           fragptr.p->activeTcCounter = fragptr.p->activeTcCounter + 1;
18437           tcConnectptr.p->logWriteState = TcConnectionrec::NOT_WRITTEN_WAIT;
18438         }//if
18439         break;
18440       default:
18441         jam();
18442         /*empty*/;
18443         break;
18444       }//switch
18445     }//if
18446   }//for
18447   if (next < RNIL)
18448   {
18449     jam();
18450     signal->theData[0] = ZSCAN_TC_CONNECT;
18451     signal->theData[1] = next;
18452     signal->theData[2] = tabptr.i;
18453     signal->theData[3] = fragId;
18454     sendSignal(cownref, GSN_CONTINUEB, signal, 4, JBB);
18455   } else {
18456     jam();
18457 /*------------------------------------------------------*/
18458 /*       THE SCAN HAVE BEEN COMPLETED. WE CHECK IF ALL  */
18459 /*       OPERATIONS HAVE ALREADY BEEN COMPLETED.        */
18460 /*------------------------------------------------------*/
18461     ndbrequire(fragptr.p->activeTcCounter > 0);
18462     fragptr.p->activeTcCounter--;
18463     if (fragptr.p->activeTcCounter == 0) {
18464       jam();
18465 /*------------------------------------------------------*/
18466 /*       SET START GLOBAL CHECKPOINT TO THE NEXT        */
18467 /*       CHECKPOINT WE HAVE NOT YET HEARD ANYTHING ABOUT*/
18468 /*       THIS GCP WILL BE COMPLETELY COVERED BY THE LOG.*/
18469 /*------------------------------------------------------*/
18470       fragptr.p->startGci = cnewestGci + 1;
18471       sendCopyActiveConf(signal, tabptr.i);
18472     }//if
18473   }//if
18474   return;
18475 }//Dblqh::scanTcConnectLab()
18476 
18477 /* ========================================================================= */
18478 /* =======              INITIATE TC RECORD AT COPY FRAGMENT          ======= */
18479 /*                                                                           */
18480 /*       SUBROUTINE SHORT NAME = ICT                                         */
18481 /* ========================================================================= */
initCopyTc(Signal * signal,Operation_t op,TcConnectionrec * regTcPtr)18482 void Dblqh::initCopyTc(Signal* signal, Operation_t op, TcConnectionrec* regTcPtr)
18483 {
18484   regTcPtr->operation = ZREAD;
18485   regTcPtr->opExec = 0;	/* NOT INTERPRETED MODE */
18486   regTcPtr->schemaVersion = scanptr.p->scanSchemaVersion;
18487   Uint32 reqinfo = 0;
18488   LqhKeyReq::setDirtyFlag(reqinfo, 1);
18489   LqhKeyReq::setSimpleFlag(reqinfo, 1);
18490   LqhKeyReq::setOperation(reqinfo, op);
18491   LqhKeyReq::setGCIFlag(reqinfo, 1);
18492   LqhKeyReq::setNrCopyFlag(reqinfo, 1);
18493                                         /* AILen in LQHKEYREQ  IS ZERO */
18494   regTcPtr->reqinfo = reqinfo;
18495 /* ------------------------------------------------------------------------ */
18496 /* THE RECEIVING NODE WILL EXPECT THAT IT IS THE LAST NODE AND WILL         */
18497 /* SEND COMPLETED AS THE RESPONSE SIGNAL SINCE DIRTY_OP BIT IS SET.         */
18498 /* ------------------------------------------------------------------------ */
18499   regTcPtr->nodeAfterNext[0] = ZNIL;
18500   regTcPtr->nodeAfterNext[1] = ZNIL;
18501   regTcPtr->tcBlockref = cownref;
18502   regTcPtr->readlenAi = 0;
18503   regTcPtr->nextSeqNoReplica = 0;
18504   regTcPtr->dirtyOp = ZFALSE;
18505   regTcPtr->lastReplicaNo = 0;
18506   regTcPtr->currTupAiLen = 0;
18507   regTcPtr->tcTimer = cLqhTimeOutCount;
18508   regTcPtr->indTakeOver = ZFALSE;
18509 }//Dblqh::initCopyTc()
18510 
18511 /* ------------------------------------------------------------------------- */
18512 /* -------               SEND COPY_ACTIVECONF TO MASTER DIH          ------- */
18513 /*                                                                           */
18514 /* ------------------------------------------------------------------------- */
sendCopyActiveConf(Signal * signal,Uint32 tableId)18515 void Dblqh::sendCopyActiveConf(Signal* signal, Uint32 tableId)
18516 {
18517   releaseActiveCopy(signal);
18518   CopyActiveConf * const conf = (CopyActiveConf *)&signal->theData[0];
18519   conf->userPtr = fragptr.p->masterPtr;
18520   conf->tableId = tableId;
18521   conf->fragId = fragptr.p->fragId;
18522   conf->startingNodeId = cownNodeid;
18523   conf->startGci = fragptr.p->startGci;
18524   sendSignal(fragptr.p->masterBlockref, GSN_COPY_ACTIVECONF, signal,
18525              CopyActiveConf::SignalLength, JBB);
18526 }//Dblqh::sendCopyActiveConf()
18527 
18528 /**
18529  * Client side of HALT/RESUME Copy fragment
18530  * ----------------------------------------
18531  * This is executed by starting node when we are getting low
18532  * on space in UNDO log. The aim is to avoid getting error
18533  * 1501 when inserting a new row in the starting node. While
18534  * we are halted we will execute a local LCP to ensure that
18535  * more space is created in the UNDO log for disk data
18536  * pages.
18537  *
18538  * We discover that we need to halt the execution of copy
18539  * fragment in the starting node. When we receive this condition
18540  * we might have copy fragments ongoing. The variables
18541  * c_fragCopyTable and c_fragCopyFrag is set to RNIL when no
18542  * copy fragment is ongoing. Otherwise they point to the currently
18543  * active fragment being copied.
18544  *
18545  * The starting node see the following flow of signals.
18546  *
18547  * 0) Before any copy started
18548  * Indicated by m_copy_started_state is AC_NORMAL, fragStatus is
18549  * FSACTIVE and m_copy_complete_flag is 0.
18550  *
18551  * 1) PREPARE_COPY_FRAGREQ
18552  * Sent before the live node gets the COPY_FRAGREQ signal.
18553  * Indicated by setting m_copy_started_state to AC_IGNORED and setting
18554  * fragStatus to ACTIVE_CREATION.
18555  *
18556  * 2) First LQHKEYREQ signal received (=> COPY_FRAGREQ received
18557  *    at live node).
18558  * Indicated by setting m_copy_started_state to AC_NR_COPY.
18559  *
18560  * 3) COPY_ACTIVEREQ received with CAR_NO_LOGGING and CAR_NO_WAIT set
18561  * Indicated by setting m_copy_started to AC_NORMAL and fragStatus to
18562  * FSACTIVE and m_copy_complete_flag to 1.
18563  *
18564  * If we find a copy fragment active it will be in either 1) or 2) above.
18565  * If all fragments are in either 0) or 3) then no active copy fragment
18566  * is ongoing.
18567  */
18568 void
send_halt_copy_frag(Signal * signal)18569 Dblqh::send_halt_copy_frag(Signal *signal)
18570 {
18571   ndbrequire(c_undo_log_overloaded);
18572   ndbrequire(!c_copy_frag_halt_process_locked);
18573   ndbrequire(!(c_copy_frag_halted &&
18574                c_copy_frag_halt_state == WAIT_RESUME_COPY_FRAG_CONF));
18575   ndbrequire(!(!c_copy_frag_halted &&
18576                c_copy_frag_halt_state == WAIT_HALT_COPY_FRAG_CONF));
18577   ndbassert(is_copy_frag_in_progress());
18578   if (c_fragCopyTable == RNIL)
18579   {
18580     jam();
18581     /**
18582      * No active checkpoint ongoing.
18583      * Set c_copy_frag_halted to true and c_copy_frag_halt_state to
18584      * COPY_FRAG_HALT_STATE_IDLE. Will halt copy fragment when receiving
18585      * PREPARE_COPY_FRAGREQ.
18586      */
18587     DEB_COPY(("(%u): Halted, no active copy", instance()));
18588     c_copy_frag_halted = true;
18589     c_copy_frag_halt_state = COPY_FRAG_HALT_STATE_IDLE;
18590     return;
18591   }
18592 
18593   tabptr.i = c_fragCopyTable;
18594   ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
18595   ndbrequire(getFragmentrec(signal, c_fragCopyFrag));
18596 
18597   if (fragptr.p->m_copy_started_state == Fragrecord::AC_IGNORED)
18598   {
18599     jam();
18600     /**
18601      * State 1) above
18602      * We have received the PREPARE_COPY_FRAGREQ already, but we have
18603      * not yet received the first LQHKEYREQ yet. So we cannot be sure
18604      * that the starting node have received COPY_FRAGREQ yet. We
18605      * indicate that we wait for first LQHKEYREQ by setting
18606      * c_copy_frag_halted to true AND c_copy_frag_halt_state to
18607      * WAIT_FIRST_LQHKEYREQ.
18608      *
18609      * It is possible that not no first LQHKEYREQ arrives if no rows
18610      * are sent before the copy fragment of this fragment is
18611      * completed. In this case we will either see a new
18612      * PREPARE_COPY_FRAG_REQ arrive or we will see that the
18613      * first phase of copy fragment is completed.
18614      */
18615     DEB_COPY(("(%u): Halt when first LQHKEYREQ arrives, tab(%u,%u)",
18616               instance(),
18617               c_fragCopyTable,
18618               c_fragCopyFrag));
18619     c_copy_frag_halted = false;
18620     c_copy_frag_halt_state = COPY_FRAG_HALT_WAIT_FIRST_LQHKEYREQ;
18621     return;
18622   }
18623   ndbrequire(fragptr.p->m_copy_started_state == Fragrecord::AC_NR_COPY);
18624   jam();
18625   /**
18626    * We can be sure that the live node have received the COPY_FRAGREQ.
18627    * Send HALT_COPY_FRAG_REQ to live node to stop copy fragment process
18628    * temporarily.
18629    */
18630   DEB_COPY(("(%u): Halt copy fragment process in live node, tab(%u,%u)",
18631             instance(),
18632             c_fragCopyTable,
18633             c_fragCopyFrag));
18634   c_copy_frag_halted = false;
18635   c_copy_frag_halt_process_locked = true;
18636   c_copy_frag_halt_state = WAIT_HALT_COPY_FRAG_CONF;
18637   BlockReference ref = numberToRef(DBLQH,
18638                                    fragptr.p->lqhInstanceKey,
18639                                    c_prepare_copy_fragreq_save.copyNodeId);
18640 
18641   HaltCopyFragReq *req = (HaltCopyFragReq*)signal->getDataPtrSend();
18642   req->senderRef = reference();
18643   req->senderData = 0;
18644   req->tableId = fragptr.p->tabRef;
18645   req->fragmentId = fragptr.p->fragId;
18646   sendSignal(ref, GSN_HALT_COPY_FRAG_REQ, signal,
18647              HaltCopyFragReq::SignalLength, JBB);
18648 }
18649 
is_copy_frag_in_progress(void)18650 bool Dblqh::is_copy_frag_in_progress(void)
18651 {
18652   if (m_second_activate_fragment_ptr_i == RNIL &&
18653       c_copy_fragment_in_progress)
18654   {
18655     jam();
18656     return true;
18657   }
18658   return false;
18659 }
18660 
18661 void
execHALT_COPY_FRAG_CONF(Signal * signal)18662 Dblqh::execHALT_COPY_FRAG_CONF(Signal *signal)
18663 {
18664   HaltCopyFragConf *conf = (HaltCopyFragConf*)signal->getDataPtr();
18665   Uint32 cause = conf->cause;
18666   tabptr.i = conf->tableId;
18667   Uint32 fragId = conf->fragmentId;
18668   ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
18669   ndbrequire(getFragmentrec(signal, fragId));
18670   c_copy_frag_halt_process_locked = false;
18671   if (cause == HaltCopyFragConf::COPY_FRAG_HALTED)
18672   {
18673     jam();
18674     ndbrequire(is_copy_frag_in_progress());
18675     DEB_COPY(("(%u)Halted copy fragment process in live node,"
18676               " tab(%u,%u)",
18677               instance(),
18678               tabptr.i,
18679               fragId));
18680     c_copy_frag_halted = true;
18681     c_copy_frag_halt_state = COPY_FRAG_IS_HALTED;
18682     if (!c_undo_log_overloaded)
18683     {
18684       jam();
18685       send_resume_copy_frag(signal);
18686     }
18687     return;
18688   }
18689   ndbrequire(cause == HaltCopyFragConf::COPY_FRAG_COMPLETED);
18690   /**
18691    * The copy fragment completed before we got to it. Let's restart
18692    * the halt process.
18693    */
18694   DEB_COPY(("(%u)Completed copy fragment process in live node"
18695             ", tab(%u,%u)",
18696             instance(),
18697             tabptr.i,
18698             fragId));
18699   ndbrequire(!c_copy_frag_halted);
18700   c_copy_frag_halt_state = COPY_FRAG_HALT_STATE_IDLE;
18701   if (c_undo_log_overloaded && is_copy_frag_in_progress())
18702   {
18703     jam();
18704     DEB_COPY(("(%u): Restart halt copy fragment process",
18705               instance()));
18706     send_halt_copy_frag(signal);
18707   }
18708   return;
18709 }
18710 
18711 void
send_resume_copy_frag(Signal * signal)18712 Dblqh::send_resume_copy_frag(Signal *signal)
18713 {
18714   ndbrequire(!c_undo_log_overloaded);
18715   ndbrequire(!c_copy_frag_halt_process_locked);
18716   ndbassert(is_copy_frag_in_progress());
18717   if (c_copy_frag_halted)
18718   {
18719     if (c_copy_frag_halt_state == COPY_FRAG_HALT_STATE_IDLE)
18720     {
18721       jam();
18722       /**
18723        * No need to do anything. We had not yet been able to halt any
18724        * copy fragment process. So simply continue after resetting
18725        * c_copy_frag_halted flag.
18726        */
18727       DEB_COPY(("(%u): Copy fragment process resumed, was idle",
18728                 instance()));
18729       c_copy_frag_halted = false;
18730       return;
18731     }
18732     else if (c_copy_frag_halt_state == COPY_FRAG_IS_HALTED)
18733     {
18734       jam();
18735       /**
18736        * The live node has halted its copy fragment scan. We need to
18737        * resume the copy fragment scan again.
18738        * Only after receiving RESUME_COPY_FRAG_CONF are we able to
18739        * reset the halt state flag.
18740        */
18741       tabptr.i = c_fragCopyTable;
18742       Uint32 fragId = c_fragCopyFrag;
18743       ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
18744       ndbrequire(getFragmentrec(signal, fragId));
18745 
18746       DEB_COPY(("(%u): Send RESUME_COPY_FRAG_REQ, tab(%u,%u)",
18747                 instance(),
18748                 fragptr.p->tabRef,
18749                 fragptr.p->fragId));
18750       c_copy_frag_halt_process_locked = true;
18751       BlockReference ref = numberToRef(DBLQH,
18752                                        fragptr.p->lqhInstanceKey,
18753                                        c_prepare_copy_fragreq_save.copyNodeId);
18754       c_copy_frag_halt_state = WAIT_RESUME_COPY_FRAG_CONF;
18755       ResumeCopyFragReq *req = (ResumeCopyFragReq*)signal->getDataPtrSend();
18756       req->senderRef = reference();
18757       req->senderData = 0;
18758       req->tableId = fragptr.p->tabRef;
18759       req->fragmentId = fragptr.p->fragId;
18760       sendSignal(ref, GSN_RESUME_COPY_FRAG_REQ, signal,
18761                  HaltCopyFragReq::SignalLength, JBB);
18762       return;
18763     }
18764     else if (c_copy_frag_halt_state == PREPARE_COPY_FRAG_IS_HALTED)
18765     {
18766       jam();
18767       Uint32 tableId = c_prepare_copy_fragreq_save.tableId;
18768       Uint32 fragId = c_prepare_copy_fragreq_save.fragId;
18769       DEB_COPY(("(%u): Resume PREPARE_COPY_FRAGREQ, tab(%u,%u)",
18770                 instance(),
18771                 tableId,
18772                 fragId));
18773       Uint32 max_page;
18774       tabptr.i = tableId;
18775       ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
18776       ndbrequire(getFragmentrec(signal, fragId));
18777       Uint32 completedGci = fragptr.p->m_completed_gci;
18778       c_tup->get_frag_info(tableId, fragId, &max_page);
18779       send_prepare_copy_frag_conf(signal,
18780                                   c_prepare_copy_fragreq_save,
18781                                   completedGci,
18782                                   max_page);
18783       c_copy_frag_halted = false;
18784       c_copy_frag_halt_state = COPY_FRAG_HALT_STATE_IDLE;
18785       return;
18786     }
18787     else
18788     {
18789       jamLine(Uint16(c_copy_frag_halt_state));
18790       ndbabort();
18791       return; //Compiler silencer
18792     }
18793   }
18794   else if (c_copy_frag_halt_state == COPY_FRAG_HALT_WAIT_FIRST_LQHKEYREQ)
18795   {
18796     jam();
18797     /**
18798      * No need to do anything. We had not yet been able to halt any
18799      * copy fragment process. So simply continue after resetting to
18800      * an idle state again.
18801      */
18802     c_copy_frag_halted = false;
18803     DEB_COPY(("(%u): Resumed, was still waiting for LQHKEYREQ",
18804               instance()));
18805     c_copy_frag_halt_state = COPY_FRAG_HALT_STATE_IDLE;
18806     return;
18807   }
18808   else
18809   {
18810     jamLine(c_copy_frag_halt_state);
18811     ndbabort();
18812   }
18813 }
18814 
18815 void
execRESUME_COPY_FRAG_CONF(Signal * signal)18816 Dblqh::execRESUME_COPY_FRAG_CONF(Signal *signal)
18817 {
18818   jamEntry();
18819   jamLine(Uint16(c_copy_frag_halt_state));
18820   c_copy_frag_halted = false;
18821   c_copy_frag_halt_process_locked = false;
18822   c_copy_frag_halt_state = COPY_FRAG_HALT_STATE_IDLE;
18823   DEB_COPY(("(%u) execRESUME_COPY_FRAG_CONF, tab(%u,%u)",
18824             instance(),
18825             c_fragCopyTable,
18826             c_fragCopyFrag));
18827   if (c_undo_log_overloaded && is_copy_frag_in_progress())
18828   {
18829     jam();
18830     /**
18831      * UNDO log is overloaded again. We need to halt it again.
18832      */
18833     DEB_COPY(("(%u): Need to halt again", instance()));
18834     send_halt_copy_frag(signal);
18835     return;
18836   }
18837   else
18838   {
18839     jam();
18840     /**
18841      * Normal path, the UNDO isn't overloaded anymore and we have
18842      * resumed normal operation. We have already set the correct
18843      * state, so we can simply return and the copy processes will
18844      * continue as normal. The resume was done by the live node.
18845      */
18846     DEB_COPY(("(%u): Resumed copy fragment, tab(%u,%u)",
18847               instance(),
18848               c_fragCopyTable,
18849               c_fragCopyFrag));
18850     return;
18851   }
18852 }
18853 
18854 /**
18855  * LGMAN UNDO log level reporting
18856  * ------------------------------
18857  * Every time LGMAN reaches some predefined levels it will
18858  * send information of the level change to all LQHs. This will
18859  * enable LQH to do various things to minimize the risk of
18860  * running out of UNDO log. During a restart we can stop
18861  * temporarily the synchronisation process. During normal
18862  * operation we can increase the speed of writing local
18863  * checkpoints.
18864  */
18865 void
execUNDO_LOG_LEVEL_REP(Signal * signal)18866 Dblqh::execUNDO_LOG_LEVEL_REP(Signal *signal)
18867 {
18868 #define OVERLOAD_LEVEL 90
18869   UndoLogLevelRep *rep = (UndoLogLevelRep*)signal->getDataPtr();
18870   Uint32 levelUsed = rep->levelUsed;
18871 
18872   c_backup->set_undo_log_level(levelUsed);
18873 
18874   DEB_LOCAL_LCP_EXTRA(("(%u)UNDO_LOG_LEVEL: %u percent, copy in progress: %u",
18875                        instance(),
18876                        levelUsed,
18877                        c_copy_fragment_in_progress));
18878   if (c_copy_fragment_in_progress)
18879   {
18880     if (levelUsed >= OVERLOAD_LEVEL)
18881     {
18882       if (c_undo_log_overloaded)
18883       {
18884         jam();
18885         /**
18886          * Nothing new, we are already set to overload state, no
18887          * need to report it again until we have dipped into
18888          * normal state again for a while.
18889          */
18890       }
18891       else
18892       {
18893         jam();
18894         /**
18895          * We have reached an overload state. We need to
18896          * halt copy fragment process. In addition we also
18897          * need to ensure that we run a complete local LCP
18898          * before we can continue again since this is the
18899          * only method to bring down the UNDO log level.
18900          */
18901         c_undo_log_overloaded = true;
18902         if (!c_copy_frag_halt_process_locked)
18903         {
18904           jam();
18905           send_halt_copy_frag(signal);
18906         }
18907       }
18908     }
18909     else
18910     {
18911       if (!c_undo_log_overloaded)
18912       {
18913         jam();
18914         /**
18915          * Nothing new, we are already set to normal state, no
18916          * need to report it again until we have dipped into
18917          * overload state for a while.
18918          */
18919       }
18920       else
18921       {
18922         jam();
18923         c_undo_log_overloaded = false;
18924         if (!c_copy_frag_halt_process_locked)
18925         {
18926           jam();
18927           send_resume_copy_frag(signal);
18928         }
18929       }
18930     }
18931   }
18932 }
18933 
18934 /**
18935  * Server side of HALT/RESUME Copy fragment
18936  * ----------------------------------------
18937  * This is executed on the live node that is copying the rows to the
18938  * starting node.
18939  */
18940 void
execHALT_COPY_FRAG_REQ(Signal * signal)18941 Dblqh::execHALT_COPY_FRAG_REQ(Signal *signal)
18942 {
18943   HaltCopyFragReq *req = (HaltCopyFragReq*)signal->getDataPtr();
18944   c_halt_copy_fragreq_save = *req;
18945 
18946   if (c_tc_connect_rec_copy_frag == RNIL)
18947   {
18948     jamEntry();
18949     /**
18950      * No active copy fragment, obviously copy has been completed
18951      * already, we will not arrive here unless there was an active
18952      * copy fragment going on. So if none is active anymore it
18953      * means we've already completed the copy. We return immediately.
18954      */
18955     DEB_COPY(("(%u):HALT_COPY_FRAG_REQ: no active copy",
18956                    instance()));
18957     send_halt_copy_frag_conf(signal, true);
18958     return;
18959   }
18960   jamEntry();
18961   /**
18962    * Active copy fragment found, we will wait for it to be fully
18963    * halted before responding. When successfully halted it we
18964    * will respond, we will also respond if not able to halt it
18965    * before it was completed.
18966    */
18967   DEB_COPY(("(%u):HALT_COPY_FRAG_REQ: start halting",
18968             instance()));
18969   c_copy_frag_live_node_performing_halt = true;
18970   c_copy_frag_live_node_halted = false;
18971 }
18972 
18973 void
send_halt_copy_frag_conf(Signal * signal,bool completed)18974 Dblqh::send_halt_copy_frag_conf(Signal *signal, bool completed)
18975 {
18976   HaltCopyFragConf *conf= (HaltCopyFragConf*)signal->getDataPtrSend();
18977   conf->cause = completed ?
18978                 HaltCopyFragConf::COPY_FRAG_COMPLETED :
18979                 HaltCopyFragConf::COPY_FRAG_HALTED;
18980 
18981   conf->senderData = c_halt_copy_fragreq_save.senderData;
18982   conf->tableId = c_halt_copy_fragreq_save.tableId;
18983   conf->fragmentId = c_halt_copy_fragreq_save.fragmentId;
18984   sendSignal(c_halt_copy_fragreq_save.senderRef,
18985              GSN_HALT_COPY_FRAG_CONF,
18986              signal,
18987              HaltCopyFragConf::SignalLength,
18988              JBB);
18989 }
18990 
18991 void
execRESUME_COPY_FRAG_REQ(Signal * signal)18992 Dblqh::execRESUME_COPY_FRAG_REQ(Signal *signal)
18993 {
18994   jamEntry();
18995   ndbrequire(c_copy_frag_live_node_halted);
18996   ndbrequire(!c_copy_frag_live_node_performing_halt);
18997   ndbrequire(c_tc_connect_rec_copy_frag != RNIL);
18998   c_copy_frag_live_node_halted = false;
18999   DEB_COPY(("(%u):RESUME_COPY_FRAG_REQ received", instance()));
19000 
19001   send_resume_copy_frag_conf(signal);
19002 
19003   /**
19004    * Resume copy fragment process by reissuing nextRecordCopy
19005    */
19006   TcConnectionrecPtr tcConnectptr;
19007   tcConnectptr.i = c_tc_connect_rec_copy_frag;
19008   ndbrequire(tcConnect_pool.getValidPtr(tcConnectptr));
19009   setup_scan_pointers_from_tc_con(tcConnectptr);
19010   ndbrequire(tcConnectptr.p->copyCountWords == 0);
19011   ndbrequire(scanptr.p->scanState == ScanRecord::COPY_FRAG_HALTED);
19012   scanptr.p->scanState = ScanRecord::WAIT_LQHKEY_COPY;
19013   nextRecordCopy(signal, tcConnectptr);
19014 }
19015 
19016 void
send_resume_copy_frag_conf(Signal * signal)19017 Dblqh::send_resume_copy_frag_conf(Signal *signal)
19018 {
19019   ResumeCopyFragReq req = *(ResumeCopyFragReq*)signal->getDataPtr();
19020   ResumeCopyFragConf *conf= (ResumeCopyFragConf*)signal->getDataPtrSend();
19021   conf->senderData = req.senderData;
19022   conf->tableId = req.tableId;
19023   conf->fragmentId = req.fragmentId;
19024   sendSignal(req.senderRef, GSN_RESUME_COPY_FRAG_CONF, signal,
19025              ResumeCopyFragConf::SignalLength, JBB);
19026 }
19027 
19028 /* ##########################################################################
19029  * #######                       LOCAL CHECKPOINT MODULE              #######
19030  *
19031  * ##########################################################################
19032  * --------------------------------------------------------------------------
19033  *  THIS MODULE HANDLES THE EXECUTION AND CONTROL OF LOCAL CHECKPOINTS
19034  *  IT CONTROLS THE LOCAL CHECKPOINTS IN TUP AND ACC. IT DOES ALSO INTERACT
19035  *  WITH DIH TO CONTROL WHICH GLOBAL CHECKPOINTS THAT ARE RECOVERABLE
19036  *
19037  * We can prepare a fragment checkpoint while we are executing another
19038  * fragment checkpoint. The reason for this is to make sure that we have
19039  * quick progress even with many small fragments.
19040  *
19041  * Preparing a fragment for checkpoint execution means opening a header file
19042  * for the fragment and then opening a new file to contain the data from this
19043  * checkpoint. To perform a restore one might have to execute several
19044  * checkpoints from the oldest to the newest. How to perform recovery is
19045  * found in the fragment checkpoint header file.
19046  *
19047  * There is also a background process after completing the fragment checkpoint
19048  * performed by the BACKUP block. This background process will delete old
19049  * checkpoint files to ensure that we don't run out of file space. This
19050  * process might be interrupted by a crash, it will however be completed
19051  * next time the fragment is checkpointed.
19052  * ------------------------------------------------------------------------- */
19053 
19054 void
force_lcp(Signal * signal)19055 Dblqh::force_lcp(Signal* signal)
19056 {
19057   /* If there is a system or node restart in progress,
19058    * request an lcp to be triggered when the restart completes
19059    * without waiting for transaction load or
19060    * expiry of TimeBetweenLocalCheckpoints,
19061    * in order to reduce the redo log handling during any
19062    * potential multi-node crashes and ensure the recoverability.
19063    */
19064   if (!getNodeState().getSystemRestartInProgress() &&
19065       !getNodeState().getNodeRestartInProgress() &&
19066       cLqhTimeOutCount == c_last_force_lcp_time)
19067   {
19068     jam();
19069     return;
19070   }
19071 
19072   c_last_force_lcp_time = cLqhTimeOutCount;
19073   signal->theData[0] = DumpStateOrd::DihStartLcpImmediately;
19074   sendSignal(DBDIH_REF, GSN_DUMP_STATE_ORD, signal, 1, JBB);
19075 }
19076 
19077 /**
19078  * At this point DBDIH master is about to start the next
19079  * distributed LCP. At this point DIH has locked the
19080  * meta data, so no table creations are in process at
19081  * this time. So all tables that exists now will be part of
19082  * this LCP. New tables created after this point will show
19083  * up here with LQHFRAGREQ signals and for each fragment
19084  * the LCP will execute it will call lcp_max_completed_gci.
19085  * So we set it to 0 here to indicate the minimum is not yet
19086  * set. After that we will call the set_min_keep_gci
19087  * function each time we come to LQHFRAGREQ and coming to
19088  * lcp_max_completed_gci.
19089  *
19090  * We only receive this signal when we are participating in
19091  * the distributed LCP to avoid messing things up for local LCP
19092  * execution.
19093  *
19094  * We keep the gci - 1 from here as well just to verify that
19095  * the keepGci isn't set before this GCI, this would indicate
19096  * some severe problem of our understanding of the code.
19097  */
execSTART_NODE_LCP_REQ(Signal * signal)19098 void Dblqh::execSTART_NODE_LCP_REQ(Signal *signal)
19099 {
19100   jamEntry();
19101 #ifdef DEBUG_LCP
19102   Uint32 current_gci = signal->theData[0];
19103   Uint32 backup_restorable_gci = c_backup->getRestorableGci();
19104 #endif
19105   Uint32 restorable_gci = signal->theData[1];
19106   c_keep_gci_for_lcp = restorable_gci;
19107   DEB_LCP(("(%u)c_keep_gci_for_lcp = %u,"
19108            " current_gci = %u, restorable_gci = %u"
19109            ", cnewestCompletedGci = %u, "
19110            "backup_restorable_gci = %u",
19111             instance(),
19112             c_keep_gci_for_lcp,
19113             current_gci,
19114             restorable_gci,
19115             cnewestCompletedGci,
19116             backup_restorable_gci));
19117   c_max_keep_gci_in_lcp = c_keep_gci_for_lcp;
19118   c_first_set_min_keep_gci = true;
19119   BlockReference ref;
19120   if (isNdbMtLqh())
19121   {
19122     jam();
19123     ref = DBLQH_REF;
19124   }
19125   else
19126   {
19127     jam();
19128     ref = DBDIH_REF;
19129   }
19130   signal->theData[0] = 1;
19131   sendSignal(ref, GSN_START_NODE_LCP_CONF, signal, 1, JBB);
19132 
19133   if (getNodeState().startLevel >= NodeState::SL_STOPPING_4)
19134   {
19135     /**
19136      * The restorable_gci is not restorable in our node,
19137      * so don't update Backup's view of restorable GCI
19138      * at this time since that would create an LCP that
19139      * isn't restorable.
19140      *
19141      * By not updating the restorable GCI in Backup we
19142      * ensure that the LCP won't complete if any updates
19143      * have occurred in the LCP. Thus we don't risk that
19144      * we overwrite all restorable LCP files.
19145      */
19146     jam();
19147     return;
19148   }
19149   if (cstartPhase != ZNIL)
19150   {
19151     jam();
19152     /**
19153      * The node is not yet complete with its restart.
19154      * So we cannot yet guarantee that the restorable
19155      * GCI is restorable in this node even if it is
19156      * restorable in the cluster.
19157      */
19158     return;
19159   }
19160   jam();
19161   signal->theData[0] = restorable_gci;
19162   EXECUTE_DIRECT(BACKUP, GSN_RESTORABLE_GCI_REP, signal, 1);
19163 }
19164 
set_min_keep_gci(Uint32 max_completed_gci)19165 void Dblqh::set_min_keep_gci(Uint32 max_completed_gci)
19166 {
19167   if (c_first_set_min_keep_gci)
19168   {
19169     jam();
19170     c_first_set_min_keep_gci = false;
19171     c_max_keep_gci_in_lcp = max_completed_gci;
19172     DEB_LCP(("(%u)First: c_max_keep_gci_in_lcp = %u",
19173               instance(), c_max_keep_gci_in_lcp));
19174   }
19175   else if (c_max_keep_gci_in_lcp > max_completed_gci)
19176   {
19177     jam();
19178     c_max_keep_gci_in_lcp = max_completed_gci;
19179     DEB_LCP(("(%u)c_max_keep_gci_in_lcp = %u",
19180               instance(), c_max_keep_gci_in_lcp));
19181   }
19182 }
19183 
execLCP_FRAG_ORD(Signal * signal)19184 void Dblqh::execLCP_FRAG_ORD(Signal* signal)
19185 {
19186   jamEntry();
19187   CRASH_INSERTION(5010);
19188 
19189   LcpFragOrd lcpFragOrdCopy = * (LcpFragOrd *)&signal->theData[0];
19190   LcpFragOrd * lcpFragOrd = &lcpFragOrdCopy;
19191 
19192   Uint32 lcpId = lcpFragOrd->lcpId;
19193 
19194   lcpPtr.i = 0;
19195   ptrAss(lcpPtr, lcpRecord);
19196 
19197   if (c_lcpId != lcpFragOrd->lcpId)
19198   {
19199     jam();
19200 
19201     lcpPtr.p->firstFragmentFlag= true;
19202     c_max_gci_in_lcp = 0;
19203     c_fragments_in_lcp = 0;
19204 
19205 #ifdef ERROR_INSERT
19206     if (check_ndb_versions())
19207     {
19208       /**
19209        * Only (so-far) in error insert
19210        *   check that keepGci (tail of REDO) is smaller than of head of REDO
19211        *
19212        */
19213       if (! ((cnewestCompletedGci >= lcpFragOrd->keepGci) &&
19214              (cnewestGci >= lcpFragOrd->keepGci)))
19215       {
19216         ndbout_c("lcpFragOrd->keepGci: %u cnewestCompletedGci: %u cnewestGci: %u",
19217                  lcpFragOrd->keepGci, cnewestCompletedGci, cnewestGci);
19218       }
19219       ndbrequire(cnewestCompletedGci >= lcpFragOrd->keepGci);
19220       ndbrequire(cnewestGci >= lcpFragOrd->keepGci);
19221     }
19222 #endif
19223 
19224     c_lcpId = lcpFragOrd->lcpId;
19225     ndbrequire(is_lcp_idle(lcpPtr.p));
19226     if (signal->getSendersBlockRef() != reference())
19227     {
19228       jam();
19229       if (c_lcpId > m_curr_lcp_id)
19230       {
19231         jam();
19232         m_curr_lcp_id = c_lcpId;
19233         m_curr_local_lcp_id = 0;
19234       }
19235       else
19236       {
19237         m_curr_local_lcp_id++;
19238         DEB_LCP(("(%u)Starting another distributed LCP with same id,"
19239                  " stepping up local LCP id, LCP(%u,%u)",
19240                  instance(),
19241                  m_curr_lcp_id,
19242                  m_curr_local_lcp_id));
19243         ndbrequire(c_lcpId == m_curr_lcp_id);
19244       }
19245       g_eventLogger->debug("(%u)Starting distributed LCP(%u,%u)",
19246                           instance(),
19247                           m_curr_lcp_id,
19248                           m_curr_local_lcp_id);
19249       m_first_distributed_lcp_started = true;
19250       signal->theData[0] = c_lcpId;
19251       sendSignal(NDBCNTR_REF, GSN_START_DISTRIBUTED_LCP_ORD, signal, 1, JBB);
19252     }
19253     else
19254     {
19255       g_eventLogger->info("(%u)Starting local LCP(%u,%u)",
19256                           instance(),
19257                           m_curr_lcp_id,
19258                           m_curr_local_lcp_id);
19259       ndbrequire(lcpFragOrd->keepGci == 0);
19260     }
19261     ndbrequire(clcpCompletedState == LCP_IDLE);
19262     clcpCompletedState = LCP_RUNNING;
19263 
19264     /**
19265      * We preset some variables that will stay the same for the entire
19266      * LCP execution.
19267      */
19268     lcpPtr.p->currentPrepareFragment.lcpFragOrd.lcpId = c_lcpId;
19269     lcpPtr.p->currentPrepareFragment.lcpFragOrd.keepGci = lcpFragOrd->keepGci;
19270     lcpPtr.p->currentPrepareFragment.lcpFragOrd.lastFragmentFlag = FALSE;
19271     /* These should be set before each LCP fragment execution */
19272     lcpPtr.p->currentPrepareFragment.lcpFragOrd.tableId = RNIL;
19273     lcpPtr.p->currentPrepareFragment.lcpFragOrd.fragmentId = RNIL;
19274     lcpPtr.p->currentPrepareFragment.lcpFragOrd.lcpNo = RNIL;
19275 
19276     if (!c_queued_lcp_frag_ord.isEmpty())
19277     {
19278       jam();
19279       lcpPtr.p->m_early_lcps_need_synch = true;
19280     }
19281     else
19282     {
19283       jam();
19284       lcpPtr.p->m_early_lcps_need_synch = false;
19285     }
19286     ndbrequire(lcpPtr.p->m_wait_early_lcp_synch == false);
19287     {
19288       Logfile_client lgman(this, c_lgman, 0);
19289       if (lgman.exists_logfile_group())
19290       {
19291         jam();
19292         LcpFragOrd *ord = (LcpFragOrd*)signal->getDataPtr();
19293         ord->tableId = 0;
19294         ord->fragmentId = 0;
19295         ord->lcpId = c_lcpId;
19296         lgman.exec_lcp_frag_ord(signal, get_current_local_lcp_id());
19297       }
19298     }
19299   }
19300   else
19301   {
19302     jam();
19303     ndbrequire(c_lcpId == lcpFragOrd->lcpId);
19304     if (lcpPtr.p->lastFragmentFlag || clcpCompletedState == LCP_IDLE)
19305     {
19306       jam();
19307       /**
19308        * Drop any message received after LCP_FRAG_ORD with last fragment
19309        * marker, must be ndbd we're running since Proxy should handle this.
19310        * Can happen after a master takeover.
19311        *
19312        * DIH doesn't keep track of number of outstanding messages, so
19313        * no need to do anything when receiving multiple LCP_FRAG_ORDs
19314        * that are discarded.
19315        */
19316       ndbrequire(!isNdbMtLqh());
19317       return;
19318     }
19319   }
19320 
19321   if (lcpFragOrd->lastFragmentFlag)
19322   {
19323     jam();
19324     lcpPtr.p->lastFragmentFlag = true;
19325     DEB_LCP(("(%u)Received last fragment flag", instance()));
19326     CRASH_INSERTION(5054);
19327     if (is_lcp_idle(lcpPtr.p))
19328     {
19329       jam();
19330       /* ----------------------------------------------------------
19331        *       NOW THE COMPLETE LOCAL CHECKPOINT ROUND IS COMPLETED.
19332        * -------------------------------------------------------- */
19333       completeLcpRoundLab(signal, lcpId);
19334     }
19335     return;
19336   }//if
19337 
19338   c_fragments_in_lcp++;
19339   tabptr.i = lcpFragOrd->tableId;
19340   ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
19341 
19342   if (unlikely(tabptr.p->tableStatus != Tablerec::TABLE_DEFINED &&
19343                tabptr.p->tableStatus != Tablerec::TABLE_READ_ONLY))
19344   {
19345     /**
19346      * There is no way to discover if we had multiple messages for this
19347      * since the table is already deleted and we don't keep information
19348      * about it anymore. Should not be a problem since the signal is
19349      * likely to be dropped somewhere and an extra LCP_FRAG_REP to a
19350      * dropped table will simply be dropped again in DBDIH.
19351      */
19352     jam();
19353     ndbrequire(c_localLcpId == 0);
19354     LcpRecord::FragOrd fragOrd;
19355     fragOrd.lcpFragOrd = * lcpFragOrd;
19356 
19357     Fragrecord tmp;
19358     tmp.maxGciInLcp = cnewestGci;
19359     tmp.maxGciCompletedInLcp = cnewestCompletedGci;
19360     sendLCP_FRAG_REP(signal, fragOrd, &tmp);
19361     return;
19362   }
19363 
19364   ndbrequire(getFragmentrec(signal, lcpFragOrd->fragmentId));
19365   bool add_to_queue = true;
19366   if (fragptr.p->m_create_table_flag_lcp_frag_ord)
19367   {
19368     jam();
19369     fragptr.p->m_create_table_flag_lcp_frag_ord = false;
19370     ndbrequire(fragptr.p->lcp_frag_ord_state == Fragrecord::LCP_QUEUED ||
19371                fragptr.p->lcp_frag_ord_state == Fragrecord::LCP_EXECUTING);
19372     if (!is_lcp_idle(lcpPtr.p))
19373     {
19374       jam();
19375       /**
19376        * Checkpoint has already started, we only need to record the fact that
19377        * this table has received an order to execute fragment as part of LCP.
19378        *
19379        * LCP_QUEUED:
19380        * -----------
19381        * Fragment have been inserted into the LCP list already. But it is still
19382        * in the queue. We will remove it from queue and insert it as for a normal
19383        * fragment and also reset the variables from the CREATE TABLE insert.
19384        * LCP_EXECUTING:
19385        * --------------
19386        * We are already in the process of executing this checkpoint.
19387        * We cannot have received any LCP_FRAG_ORD previous to this
19388        * since any such signal would clear the m_create_table_flag_lcp_frag_ord
19389        * flag. Thus we only need to update the fragment record to ensure
19390        * it contains the necessary information received in the LCP_FRAG_ORD
19391        * signal.
19392        */
19393       jam();
19394       fragptr.p->lcp_frag_ord_lcp_no = lcpFragOrd->lcpNo;
19395       fragptr.p->lcp_frag_ord_lcp_id = lcpFragOrd->lcpId;
19396       cnoOfFragsCheckpointed++;
19397       c_fragments_in_lcp++;
19398       DEB_EARLY_LCP(("(%u)LCP_FRAG_ORD for tab(%u,%u), clear flag",
19399                      instance(),
19400                      fragptr.p->tabRef,
19401                      fragptr.p->fragId));
19402       return;
19403     }
19404     DEB_EARLY_LCP(("(%u)LCP_FRAG_ORD for tab(%u,%u), clear flag and start",
19405                    instance(),
19406                    fragptr.p->tabRef,
19407                    fragptr.p->fragId));
19408     ndbrequire(fragptr.p->lcp_frag_ord_state == Fragrecord::LCP_QUEUED);
19409     /**
19410      * We will insert it into LCP queue below, so we need to remove it
19411      * here to avoid inserting it twice.
19412      */
19413     add_to_queue = false;
19414   }
19415   if (fragptr.p->lcp_frag_ord_state ==
19416         Fragrecord::LCP_EXECUTED_BY_CREATE_TABLE)
19417   {
19418     jam();
19419     if (fragptr.p->lcp_frag_ord_lcp_no == lcpFragOrd->lcpNo &&
19420         fragptr.p->lcp_frag_ord_lcp_id == lcpFragOrd->lcpId)
19421     {
19422       /**
19423        * We already executed a checkpoint as part of this LCP. Send
19424        * an LCP_FRAG_REP based on this information.
19425        */
19426       jam();
19427       DEB_EARLY_LCP(("(%u)LCP_FRAG_ORD for tab(%u,%u), already executed",
19428                      instance(),
19429                      fragptr.p->tabRef,
19430                      fragptr.p->fragId));
19431       LcpRecord::FragOrd fragOrd;
19432       fragOrd.lcpFragOrd.lcpNo = fragptr.p->lcp_frag_ord_lcp_no;
19433       fragOrd.lcpFragOrd.lcpId = fragptr.p->lcp_frag_ord_lcp_id;
19434       fragOrd.lcpFragOrd.fragmentId = fragptr.p->fragId;
19435       fragOrd.lcpFragOrd.tableId = fragptr.p->tabRef;
19436       fragptr.p->lcp_frag_ord_state = Fragrecord::LCP_EXECUTED;
19437       sendLCP_FRAG_REP(signal, fragOrd, fragptr.p);
19438       ndbrequire(lcpPtr.p->lastFragmentFlag == false);
19439       c_fragments_in_lcp++;
19440       return;
19441     }
19442     DEB_EARLY_LCP(("(%u)LCP_FRAG_ORD for tab(%u,%u), executed, new LCP",
19443                    instance(),
19444                    fragptr.p->tabRef,
19445                    fragptr.p->fragId));
19446     /**
19447      * This is the next LCP, continue as normal and set the state to the
19448      * expected state at this point.
19449      */
19450     fragptr.p->lcp_frag_ord_state = Fragrecord::LCP_EXECUTED;
19451     ndbrequire(fragptr.p->m_create_table_flag_lcp_frag_ord == false);
19452     ndbrequire(fragptr.p->m_create_table_insert_lcp == false);
19453   }
19454 
19455   if (fragptr.p->lcp_frag_ord_lcp_id == lcpFragOrd->lcpId &&
19456       c_localLcpId == 0)
19457   {
19458     /**
19459      * The LCP_FRAG_ORD have already been received, we need to send a report
19460      * back to the Proxy for ndbmtd to keep the outstanding counter up-to-date.
19461      * For ndbd we can simply drop the signal.
19462      */
19463     jam();
19464     if (!isNdbMtLqh())
19465     {
19466       jam();
19467       return;
19468     }
19469     /**
19470      * This signal is identified by its length, it will be used to decrease
19471      * the number of outstanding LCP_FRAG_ORD operations to the LQH instances.
19472      * From a modular point of view DBLQH will always drop this LCP_FRAG_ORD
19473      * since it already received it. In the case of ndbmtd we do however need
19474      * to ensure that DBLQH proxy also knows about the drop since it keeps
19475      * track of the number of outstanding LCP_FRAG_ORDs. When DBLQH proxy
19476      * receives this signal it will update the counter and then drop the
19477      * signal. So no signal will be sent to DBDIH in any case.
19478      */
19479     sendSignal(DBLQH_REF, GSN_LCP_FRAG_REP, signal, 1, JBA);
19480     return;
19481   }
19482 
19483   /**
19484    * Add the fragment to the queue of LCP_FRAG_ORDs.
19485    * We need to store lcpId as a flag that we received an
19486    * LCP_FRAG_ORD for this LCP, we need the lcpNo for
19487    * later when executing the LCP and we need the state
19488    * to indicate if we have completed the LCP yet which
19489    * is needed for drop table.
19490    */
19491   fragptr.p->lcp_frag_ord_lcp_no = lcpFragOrd->lcpNo;
19492   fragptr.p->lcp_frag_ord_lcp_id = lcpFragOrd->lcpId;
19493   fragptr.p->lcp_frag_ord_state = Fragrecord::LCP_QUEUED;
19494   if (add_to_queue)
19495   {
19496     jam();
19497     c_queued_lcp_frag_ord.addLast(fragptr);
19498   }
19499   cnoOfFragsCheckpointed++;
19500 
19501   if (lcpPtr.p->lcpPrepareState != LcpRecord::LCP_IDLE)
19502   {
19503     jam();
19504     return;
19505   }//if
19506   prepare_next_fragment_checkpoint(signal, false);
19507 }//Dblqh::execLCP_FRAGORD()
19508 
handleFirstFragment(Signal * signal)19509 void Dblqh::handleFirstFragment(Signal *signal)
19510 {
19511   if (lcpPtr.p->firstFragmentFlag)
19512   {
19513     jam();
19514     LcpFragOrd *ord= (LcpFragOrd*)signal->getDataPtrSend();
19515     lcpPtr.p->firstFragmentFlag= false;
19516 
19517     if (!isNdbMtLqh())
19518     {
19519       /**
19520        * First fragment mean that last LCP is complete :-)
19521        */
19522       jam();
19523       *ord = lcpPtr.p->currentPrepareFragment.lcpFragOrd;
19524       EXECUTE_DIRECT_MT(TSMAN, GSN_LCP_FRAG_ORD,
19525                         signal, signal->length(), 0);
19526       jamEntry();
19527     }
19528     else
19529     {
19530       /**
19531        * Handle by LqhProxy
19532        */
19533     }
19534   }
19535 }
19536 
execLCP_PREPARE_REF(Signal * signal)19537 void Dblqh::execLCP_PREPARE_REF(Signal* signal)
19538 {
19539   jamEntry();
19540 
19541   LcpPrepareRef* ref= (LcpPrepareRef*)signal->getDataPtr();
19542 
19543   lcpPtr.i = ref->senderData;
19544   ptrCheckGuard(lcpPtr, clcpFileSize, lcpRecord);
19545   ndbrequire(lcpPtr.p->lcpPrepareState == LcpRecord::LCP_PREPARING);
19546 
19547   fragptr.i = lcpPtr.p->currentPrepareFragment.fragPtrI;
19548   c_fragment_pool.getPtr(fragptr);
19549 
19550   ndbrequire(ref->tableId == fragptr.p->tabRef);
19551   ndbrequire(ref->fragmentId == fragptr.p->fragId);
19552 
19553   tabptr.i = ref->tableId;
19554   ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
19555 
19556   ndbrequire(refToMain(signal->getSendersBlockRef()) == BACKUP);
19557 
19558   /**
19559    * Only table no longer present is acceptable - anything
19560    * else is a hard error.
19561    * This sometimes manifests as error 785 - 'Schema object is busy with another...'
19562    * which we treat in the same way. This happens when the table is dropping when
19563    * we ask for the table information. So both are symptoms of a table which is
19564    * being dropped or already been dropped.
19565    */
19566   if (ref->errorCode != GetTabInfoRef::TableNotDefined &&
19567       ref->errorCode != DropTableRef::ActiveSchemaTrans)
19568   {
19569     g_eventLogger->critical("Fatal : LCP_PREPARE_REF t%uf%u errorCode %u",
19570                             ref->tableId,
19571                             ref->fragmentId,
19572                             ref->errorCode);
19573     ndbabort();
19574     return;
19575   };
19576   ndbrequire(c_localLcpId == 0);
19577 
19578   handleFirstFragment(signal);
19579   /* Carry on with the next table... */
19580   lcpPtr.p->lcpPrepareState = LcpRecord::LCP_COMPLETED;
19581   if (lcpPtr.p->lcpRunState == LcpRecord::LCP_IDLE ||
19582       lcpPtr.p->lcpRunState == LcpRecord::LCP_COMPLETED)
19583   {
19584     jam();
19585     /**
19586      * Our LCP prepare was the only outstanding LCP action. So currently
19587      * no LCP to watch. We will stop it, if there are waiting fragments to
19588      * prepare for LCP then we will start watchdog again.
19589      */
19590     stopLcpFragWatchdog();
19591     lcpPtr.p->lcpRunState = LcpRecord::LCP_IDLE;
19592   }
19593   completed_fragment_checkpoint(signal, lcpPtr.p->currentPrepareFragment);
19594   prepare_next_fragment_checkpoint(signal, true);
19595 }
19596 
execLCP_PREPARE_CONF(Signal * signal)19597 void Dblqh::execLCP_PREPARE_CONF(Signal* signal)
19598 {
19599   jamEntry();
19600 
19601   LcpPrepareConf* conf= (LcpPrepareConf*)signal->getDataPtr();
19602 
19603   ndbrequire(refToMain(signal->getSendersBlockRef()) == BACKUP);
19604 
19605   lcpPtr.i = conf->senderData;
19606   ptrCheckGuard(lcpPtr, clcpFileSize, lcpRecord);
19607   ndbrequire(lcpPtr.p->lcpPrepareState == LcpRecord::LCP_PREPARING);
19608 
19609   fragptr.i = lcpPtr.p->currentPrepareFragment.fragPtrI;
19610   c_fragment_pool.getPtr(fragptr);
19611 
19612   ndbrequire(conf->tableId == fragptr.p->tabRef);
19613   ndbrequire(conf->fragmentId == fragptr.p->fragId);
19614 
19615   handleFirstFragment(signal);
19616   lcpPtr.p->lcpPrepareState = LcpRecord::LCP_PREPARED;
19617   if (lcpPtr.p->lcpRunState == LcpRecord::LCP_COMPLETED ||
19618       lcpPtr.p->lcpRunState == LcpRecord::LCP_IDLE)
19619   {
19620     /**
19621      * No fragment was currently performing checkpoint, we can start
19622      * immediately, in most cases we will start when the current
19623      * fragment checkpoint is completed.
19624      * We can also start preparing the next fragment immediately.
19625      */
19626     jam();
19627     lcpPtr.p->currentRunFragment = lcpPtr.p->currentPrepareFragment;
19628     perform_fragment_checkpoint(signal);
19629     prepare_next_fragment_checkpoint(signal, false);
19630   }
19631   ndbrequire(lcpPtr.p->lcpRunState == LcpRecord::LCP_CHECKPOINTING);
19632 }
19633 
19634 #ifdef NDB_DEBUG_FULL
19635 static struct TraceLCP {
19636   void sendSignal(Uint32 ref, Uint32 gsn, Signal* signal,
19637 		  Uint32 len, Uint32 prio);
19638   void save(Signal*);
19639   void restore(SimulatedBlock&, Signal* sig);
19640   struct Sig {
19641     enum {
19642       Sig_save = 0,
19643       Sig_send = 1
19644     } type;
19645     SignalHeader header;
19646     Uint32 theData[25];
19647   };
19648   Vector<Sig> m_signals;
19649 } g_trace_lcp;
19650 template class Vector<TraceLCP::Sig>;
19651 #endif
19652 
perform_fragment_checkpoint(Signal * signal)19653 void Dblqh::perform_fragment_checkpoint(Signal *signal)
19654 {
19655   lcpPtr.p->lcpRunState = LcpRecord::LCP_CHECKPOINTING;
19656 
19657   fragptr.i = lcpPtr.p->currentRunFragment.fragPtrI;
19658   c_fragment_pool.getPtr(fragptr);
19659 
19660   /* ----------------------------------------------------------------------
19661    *    UPDATE THE MAX_GCI_IN_LCP AND MAX_GCI_COMPLETED_IN_LCP NOW BEFORE
19662    *    ACTIVATING THE FRAGMENT AGAIN.
19663    * --------------------------------------------------------------------- */
19664   ndbrequire(lcpPtr.p->currentRunFragment.lcpFragOrd.lcpNo < MAX_LCP_STORED);
19665 
19666   BackupFragmentReq* req= (BackupFragmentReq*)signal->getDataPtr();
19667   req->tableId = lcpPtr.p->currentRunFragment.lcpFragOrd.tableId;
19668   req->fragmentNo = 0;
19669   req->backupPtr = m_backup_ptr;
19670   req->backupId = lcpPtr.p->currentRunFragment.lcpFragOrd.lcpId;
19671   req->count = 0;
19672   req->senderRef = reference();
19673 
19674 #ifdef NDB_DEBUG_FULL
19675   if(ERROR_INSERTED(5904))
19676   {
19677   g_trace_lcp.sendSignal(BACKUP_REF, GSN_BACKUP_FRAGMENT_REQ, signal,
19678 			   BackupFragmentReq::SignalLength, JBA);
19679   }
19680   else
19681 #endif
19682   {
19683     if (ERROR_INSERTED(5044) &&
19684        (fragptr.p->tabRef == c_error_insert_table_id) &&
19685         fragptr.p->fragId) // Not first frag
19686     {
19687       /**
19688        * Force CRASH_INSERTION in 10s
19689        */
19690       ndbout_c("table: %d frag: %d", fragptr.p->tabRef, fragptr.p->fragId);
19691       SET_ERROR_INSERT_VALUE(5027);
19692       sendSignalWithDelay(reference(), GSN_START_RECREQ, signal, 10000, 1);
19693     }
19694     {
19695       BlockReference backupRef = calcInstanceBlockRef(BACKUP);
19696       sendSignal(backupRef, GSN_BACKUP_FRAGMENT_REQ, signal,
19697                  BackupFragmentReq::SignalLength, JBA);
19698     }
19699   }
19700 }
19701 
execBACKUP_FRAGMENT_REF(Signal * signal)19702 void Dblqh::execBACKUP_FRAGMENT_REF(Signal* signal)
19703 {
19704   BackupFragmentRef* ref= (BackupFragmentRef*)signal->getDataPtr();
19705 
19706   if (ref->errorCode != GetTabInfoRef::TableNotDefined &&
19707       ref->errorCode != DropTableRef::ActiveSchemaTrans)
19708   {
19709     jam();
19710     BackupFragmentRef *ref= (BackupFragmentRef*)signal->getDataPtr();
19711     char buf[100];
19712     BaseString::snprintf(buf,sizeof(buf),
19713                          "Unable to store fragment during LCP. NDBFS Error: %u",
19714                          ref->errorCode);
19715 
19716     progError(__LINE__,
19717               (ref->errorCode & FsRef::FS_ERR_BIT)?
19718               NDBD_EXIT_AFS_UNKNOWN
19719               : ref->errorCode,
19720               buf);
19721   }
19722   /**
19723    * Handle dropped tables in the middle of a multi-file fragment LCP.
19724    */
19725   jam();
19726   ndbrequire(c_localLcpId == 0);
19727   lcpPtr.i = 0;
19728   ptrCheckGuard(lcpPtr, clcpFileSize, lcpRecord);
19729 
19730   Uint32 backupId = ref->backupId;
19731   Uint32 backupPtr = ref->backupPtr;
19732   BackupFragmentConf* conf= (BackupFragmentConf*)signal->getDataPtrSend();
19733   conf->backupId = backupId;
19734   conf->backupPtr = backupPtr;
19735   conf->tableId = lcpPtr.p->currentRunFragment.lcpFragOrd.tableId;
19736   conf->fragmentNo = 0;
19737   conf->noOfRecordsLow = 0;
19738   conf->noOfRecordsHigh = 0;
19739   conf->noOfBytesLow = 0;
19740   conf->noOfBytesHigh = 0;
19741   execBACKUP_FRAGMENT_CONF(signal);
19742 }
19743 
19744 bool
is_disk_columns_in_table(Uint32 tableId)19745 Dblqh::is_disk_columns_in_table(Uint32 tableId)
19746 {
19747   return c_tup->is_disk_columns_in_table(tableId);
19748 }
19749 
19750 Uint32
get_current_local_lcp_id(void)19751 Dblqh::get_current_local_lcp_id(void)
19752 {
19753   return m_curr_local_lcp_id;
19754 }
19755 
19756 void
get_lcp_frag_stats(Uint64 & row_count,Uint64 & prev_row_count,Uint64 & row_change_count,Uint64 & memory_used_in_bytes,Uint32 & max_page_cnt)19757 Dblqh::get_lcp_frag_stats(Uint64 & row_count,
19758                           Uint64 & prev_row_count,
19759                           Uint64 & row_change_count,
19760                           Uint64 & memory_used_in_bytes,
19761                           Uint32 & max_page_cnt)
19762 {
19763   /**
19764    * Now the LCP is actually starting, we set the maxGciInLcp and
19765    * maxGciCompletedInLcp at this point and we get the row count
19766    * change row counts to calculate various values for the LCP.
19767    */
19768   lcpPtr.i = 0;
19769   ptrCheckGuard(lcpPtr, clcpFileSize, lcpRecord);
19770   ndbrequire(lcpPtr.p->lcpRunState == LcpRecord::LCP_CHECKPOINTING);
19771   fragptr.i = lcpPtr.p->currentRunFragment.fragPtrI;
19772   c_fragment_pool.getPtr(fragptr);
19773   fragptr.p->maxGciInLcp = fragptr.p->newestGci;
19774   /**
19775    * Calculating MaxGciCompleted is straightforward when we are
19776    * performing a fragment LCP during normal operation and it is
19777    * also straightforward when performed during system restart.
19778    * In both those cases it is simply set to cnewestCompletedGci.
19779    *
19780    * However in node restarts there are different situations to cater
19781    * for.
19782    * 1) If the copy fragment haven't been completed yet then it should
19783    *    set to the GCI we were able to restore in this node.
19784    *    However if we were restoring an LCP that was created during a
19785    *    node restart whereafter the node crashed before completing an
19786    *    LCP where REDO log was active, in that case the MaxGciCompleted
19787    *    is the same as the one we restored.
19788    *
19789    *    We need the following information.
19790    *    GCI we restored from the REDO log (crestartNewestGci)
19791    *    MaxGciCompleted of this fragment when restored
19792    *      (fragptr.p->m_completed_gci)
19793    *    We set it to the maximum of those two.
19794    *
19795    * 2) If the copy fragment have been completed when we come here to
19796    *    start the fragment LCP in that case we can also set
19797    *    MaxGciCompleted to cnewestCompletedGci at the start of the
19798    *    fragment LCP.
19799    *
19800    * 3) If we find that RESTORE restored a newer version than
19801    *    cnewestCompletedGci (can happen when RESTORE used an LCP
19802    *    created during copy phase of restart) than we use this
19803    *    GCI rather than the cnewestCompletedGci that represents
19804    *    the GCI that DIH supposed we could restore (in reality
19805    *    we are only able to restore using the other live node
19806    *    in this state).
19807    */
19808   if ((!fragptr.p->m_copy_complete_flag) &&
19809       (cstartType == NodeState::ST_NODE_RESTART ||
19810        cstartType == NodeState::ST_INITIAL_NODE_RESTART))
19811   {
19812     if (crestartNewestGci > fragptr.p->m_completed_gci)
19813     {
19814       jam();
19815       fragptr.p->maxGciCompletedInLcp = crestartNewestGci;
19816     }
19817     else
19818     {
19819       jam();
19820       fragptr.p->maxGciCompletedInLcp = fragptr.p->m_completed_gci;
19821     }
19822   }
19823   else if (cstartType == NodeState::ST_NODE_RESTART &&
19824            fragptr.p->m_completed_gci > cnewestCompletedGci)
19825   {
19826     jam();
19827     ndbrequire(c_local_sysfile.m_node_restorable_on_its_own ==
19828                ReadLocalSysfileReq::NODE_NOT_RESTORABLE_ON_ITS_OWN);
19829     fragptr.p->maxGciCompletedInLcp = fragptr.p->m_completed_gci;
19830   }
19831   else
19832   {
19833     jam();
19834     fragptr.p->maxGciCompletedInLcp = cnewestCompletedGci;
19835   }
19836   c_tup->get_lcp_frag_stats(fragptr.p->tupFragptr,
19837                             fragptr.p->newestGci,
19838                             max_page_cnt,
19839                             row_count,
19840                             prev_row_count,
19841                             row_change_count,
19842                             memory_used_in_bytes,
19843                             true);
19844 }
19845 
19846 Uint32
get_lcp_newest_gci(void)19847 Dblqh::get_lcp_newest_gci(void)
19848 {
19849   lcpPtr.i = 0;
19850   ptrCheckGuard(lcpPtr, clcpFileSize, lcpRecord);
19851   ndbrequire(lcpPtr.p->lcpRunState == LcpRecord::LCP_CHECKPOINTING);
19852   fragptr.i = lcpPtr.p->currentRunFragment.fragPtrI;
19853   c_fragment_pool.getPtr(fragptr);
19854   return fragptr.p->newestGci;
19855 }
19856 
19857 void
lcp_complete_scan(Uint32 & newestGci)19858 Dblqh::lcp_complete_scan(Uint32 & newestGci)
19859 {
19860   lcpPtr.i = 0;
19861   ptrCheckGuard(lcpPtr, clcpFileSize, lcpRecord);
19862   ndbrequire(lcpPtr.p->lcpRunState == LcpRecord::LCP_CHECKPOINTING);
19863   fragptr.i = lcpPtr.p->currentRunFragment.fragPtrI;
19864   c_fragment_pool.getPtr(fragptr);
19865   /**
19866    * Update maxGciInLcp after scan has been performed
19867    */
19868 #if defined VM_TRACE || defined ERROR_INSERT
19869   if (fragptr.p->newestGci != fragptr.p->maxGciInLcp)
19870   {
19871     DEB_LCP(("tab(%u,%u) increasing maxGciInLcp from %u to %u",
19872              fragptr.p->tabRef,
19873              fragptr.p->fragId,
19874              fragptr.p->maxGciInLcp, fragptr.p->newestGci));
19875   }
19876 #endif
19877   newestGci = fragptr.p->newestGci;
19878   fragptr.p->maxGciInLcp = fragptr.p->newestGci;
19879   if (fragptr.p->newestGci > c_max_gci_in_lcp)
19880   {
19881     jam();
19882     c_max_gci_in_lcp = fragptr.p->newestGci;
19883     DEB_LCP(("(%u)New c_max_gci_in_lcp = %u",
19884              instance(),
19885              c_max_gci_in_lcp));
19886   }
19887   DEB_LCP(("(%u)complete_scan: newestGci = %u, tab(%u,%u)",
19888            instance(),
19889            newestGci,
19890            fragptr.p->tabRef,
19891            fragptr.p->fragId));
19892 }
19893 
19894 void
lcp_max_completed_gci(Uint32 & completedGci,Uint32 max_gci_written,Uint32 restorable_gci)19895 Dblqh::lcp_max_completed_gci(Uint32 & completedGci,
19896                              Uint32 max_gci_written,
19897                              Uint32 restorable_gci)
19898 {
19899   lcpPtr.i = 0;
19900   ptrCheckGuard(lcpPtr, clcpFileSize, lcpRecord);
19901   ndbrequire(lcpPtr.p->lcpRunState == LcpRecord::LCP_CHECKPOINTING);
19902   fragptr.i = lcpPtr.p->currentRunFragment.fragPtrI;
19903   c_fragment_pool.getPtr(fragptr);
19904 
19905   if (max_gci_written <= restorable_gci &&
19906       fragptr.p->maxGciCompletedInLcp > restorable_gci)
19907   {
19908     jam();
19909     /**
19910      * In this case we haven't written any transactions in the LCP
19911      * that isn't restorable at this point in time. So the LCP
19912      * is already restorable. We will only record a
19913      * MaxGciCompleted that is at most the restorable GCI.
19914      *
19915      * The only repercussion of this decision is that we might need
19916      * to execute one extra GCI in the REDO log for a fragment that
19917      * we know won't have any writes there. So should be of no
19918      * concern at all.
19919      *
19920      * It is important to record the changed maxGciCompletedInLcp
19921      * to ensure that DIH doesn't record a higher MaxGciCompleted
19922      * than we record in the local files.
19923      *
19924      * This also simplifies the recovery.
19925      */
19926     fragptr.p->maxGciCompletedInLcp = restorable_gci;
19927   }
19928   if (fragptr.p->maxGciCompletedInLcp < c_keep_gci_for_lcp)
19929   {
19930     jam();
19931     /**
19932      * maxGciCompletedInLcp can never be smaller than the restorable GCI
19933      * at the time when we start the LCP.
19934      */
19935     fragptr.p->maxGciCompletedInLcp = c_keep_gci_for_lcp;
19936   }
19937 
19938   completedGci = fragptr.p->maxGciCompletedInLcp;
19939   DEB_LCP(("(%u)maxGciCompletedInLcp = %u, tab(%u,%u)",
19940            instance(),
19941            completedGci,
19942            fragptr.p->tabRef,
19943            fragptr.p->fragId));
19944   set_min_keep_gci(fragptr.p->maxGciCompletedInLcp);
19945 }
19946 
execBACKUP_FRAGMENT_CONF(Signal * signal)19947 void Dblqh::execBACKUP_FRAGMENT_CONF(Signal* signal)
19948 {
19949   jamEntry();
19950 
19951   if (ERROR_INSERTED(5073))
19952   {
19953     ndbout_c("Delaying BACKUP_FRAGMENT_CONF");
19954     sendSignalWithDelay(reference(), GSN_BACKUP_FRAGMENT_CONF, signal, 500,
19955                         signal->getLength());
19956     return;
19957   }
19958 
19959   BackupFragmentConf* conf= (BackupFragmentConf*)signal->getDataPtr();
19960   Uint64 noOfRecordsLow = conf->noOfRecordsLow;
19961   Uint64 noOfRecordsHigh = conf->noOfRecordsHigh;
19962   Uint64 noOfRecords = (noOfRecordsHigh << 32) + noOfRecordsLow;
19963 
19964   Uint64 noOfBytesLow = conf->noOfBytesLow;
19965   Uint64 noOfBytesHigh = conf->noOfBytesHigh;
19966   Uint64 noOfBytes = (noOfBytesHigh << 32) + noOfBytesLow;
19967 
19968   lcpPtr.i = 0;
19969   ptrCheckGuard(lcpPtr, clcpFileSize, lcpRecord);
19970   ndbrequire(lcpPtr.p->lcpRunState == LcpRecord::LCP_CHECKPOINTING);
19971   lcpPtr.p->lcpRunState = LcpRecord::LCP_COMPLETED;
19972   lcpPtr.p->m_no_of_records = noOfRecords;
19973   lcpPtr.p->m_no_of_bytes = noOfBytes;
19974 
19975   /* ------------------------------------------------------------------------
19976    *   THE LOCAL CHECKPOINT HAS BEEN COMPLETED. IT IS NOW TIME TO START
19977    *   A LOCAL CHECKPOINT ON THE NEXT FRAGMENT OR COMPLETE THIS LCP ROUND.
19978    * ------------------------------------------------------------------------
19979    *   WE START BY SENDING LCP_REPORT TO DIH TO REPORT THE COMPLETED LCP.
19980    *   TO CATER FOR NODE CRASHES WE SEND IT IN PARALLEL TO ALL NODES.
19981    * ----------------------------------------------------------------------- */
19982 
19983   completed_fragment_checkpoint(signal, lcpPtr.p->currentRunFragment);
19984   handleLCPSurfacing(signal);
19985 
19986   if (lcpPtr.p->lcpPrepareState == LcpRecord::LCP_PREPARED)
19987   {
19988     /**
19989      * We have completed a fragment checkpoint. We can start the next
19990      * fragment checkpoint which is already prepared and ready.
19991      *
19992      * After that we will start preparing the next fragment for
19993      * checkpointing.
19994      */
19995     jam();
19996     lcpPtr.p->currentRunFragment = lcpPtr.p->currentPrepareFragment;
19997     perform_fragment_checkpoint(signal);
19998     prepare_next_fragment_checkpoint(signal, false);
19999     return;
20000   }
20001   else if (lcpPtr.p->lcpPrepareState == LcpRecord::LCP_PREPARING)
20002   {
20003     /**
20004      * We completed the fragment checkpointing before the prepare of the
20005      * next was done. We will not do anything here since we will wait for
20006      * the prepare to complete and then new action will be taken.
20007      */
20008     jam();
20009     return;
20010   }
20011   jam();
20012   ndbrequire(lcpPtr.p->lcpPrepareState == LcpRecord::LCP_IDLE);
20013 
20014   stopLcpFragWatchdog();
20015 
20016   /**
20017    * No new fragment had even started to be prepared. This can only mean
20018    * that this checkpoint have come to an end. Or at least the queue has
20019    * come to an end. We check if we have received the last fragment and
20020    * if so we complete the checkpoint. Otherwise we simply wait for
20021    * more orders to checkpoint fragments.
20022    */
20023   lcpPtr.p->lcpRunState = LcpRecord::LCP_IDLE;
20024   if (lcpPtr.p->lastFragmentFlag)
20025   {
20026     jam();
20027     /* ----------------------------------------------------------------------
20028      *       NOW THE COMPLETE LOCAL CHECKPOINT ROUND IS COMPLETED.
20029      * --------------------------------------------------------------------- */
20030     completeLcpRoundLab(signal, lcpPtr.p->currentRunFragment.lcpFragOrd.lcpId);
20031     return;
20032   }//if
20033   return;
20034 }//Dblqh::execBACKUP_FRAGMENT_CONF()
20035 
20036 void
sendLCP_FRAG_REP(Signal * signal,const LcpRecord::FragOrd & fragOrd,const Fragrecord * fragPtrP) const20037 Dblqh::sendLCP_FRAG_REP(Signal * signal,
20038 			const LcpRecord::FragOrd & fragOrd,
20039                         const Fragrecord * fragPtrP) const
20040 {
20041   ndbrequire(fragOrd.lcpFragOrd.lcpNo < MAX_LCP_STORED);
20042   LcpFragRep * const lcpReport = (LcpFragRep *)&signal->theData[0];
20043   lcpReport->nodeId = cownNodeid;
20044   lcpReport->lcpId = fragOrd.lcpFragOrd.lcpId;
20045   lcpReport->lcpNo = fragOrd.lcpFragOrd.lcpNo;
20046   lcpReport->tableId = fragOrd.lcpFragOrd.tableId;
20047   lcpReport->fragId = fragOrd.lcpFragOrd.fragmentId;
20048   lcpReport->maxGciCompleted = fragPtrP->maxGciCompletedInLcp;
20049   lcpReport->maxGciStarted = fragPtrP->maxGciInLcp;
20050 
20051   Uint32 ref = DBDIH_REF;
20052   if (isNdbMtLqh())
20053   {
20054     jam();
20055     ref = DBLQH_REF;
20056   }
20057   lcpReport->nodeId = LcpFragRep::BROADCAST_REQ;
20058   sendSignal(ref, GSN_LCP_FRAG_REP, signal,
20059              LcpFragRep::SignalLength, JBA);
20060 }
20061 
20062 void
completed_fragment_checkpoint(Signal * signal,const LcpRecord::FragOrd & fragOrd)20063 Dblqh::completed_fragment_checkpoint(Signal *signal,
20064                                      const LcpRecord::FragOrd & fragOrd)
20065 {
20066   /* ------------------------------------------------------------------------
20067    *       UPDATE THE LATEST LOCAL CHECKPOINT COMPLETED ON FRAGMENT.
20068    *       UPDATE THE LCP_ID OF THIS CHECKPOINT.
20069    *       REMOVE THE LINK BETWEEN THE FRAGMENT RECORD AND THE LCP RECORD.
20070    * ----------------------------------------------------------------------- */
20071   /**
20072    * Send rep when fragment is done + unblocked
20073    */
20074   FragrecordPtr curr_fragptr;
20075   curr_fragptr.i = fragOrd.fragPtrI;
20076   c_fragment_pool.getPtr(curr_fragptr);
20077   if (curr_fragptr.p->m_create_table_flag_lcp_frag_ord)
20078   {
20079     /**
20080      * At this point we have accomplished the desired action of running an
20081      * LCP to ensure that we can deduce how far back we need to UNDO for
20082      * this table.
20083      *
20084      * We set state to LCP_EXECUTED_BY_CREATE_TABLE to ensure that we know
20085      * that an LCP have been executed this LCP. When an LCP_FRAG_ORD is
20086      * received we will send LCP_FRAG_REP immediately if it is part of
20087      * the same LCP.
20088      *
20089      * No need to send LCP_FRAG_REP since no LCP_FRAG_ORD was requested in
20090      * this case.
20091      */
20092     jam();
20093     curr_fragptr.p->lcp_frag_ord_state =
20094       Fragrecord::LCP_EXECUTED_BY_CREATE_TABLE;
20095     curr_fragptr.p->m_create_table_flag_lcp_frag_ord = false;
20096     DEB_EARLY_LCP(("(%u)LCP executed for tab(%u,%u), flag cleared",
20097                    instance(),
20098                    curr_fragptr.p->tabRef,
20099                    curr_fragptr.p->fragId));
20100   }
20101   else
20102   {
20103     jam();
20104     curr_fragptr.p->lcp_frag_ord_state = Fragrecord::LCP_EXECUTED;
20105     if (c_localLcpId == 0)
20106     {
20107       /* Only need to send LCP_FRAG_REP during distributed LCP. */
20108       jam();
20109       sendLCP_FRAG_REP(signal, fragOrd, curr_fragptr.p);
20110     }
20111   }
20112 }
20113 
prepare_next_fragment_checkpoint(Signal * signal,bool complete_possible)20114 void Dblqh::prepare_next_fragment_checkpoint(Signal* signal,
20115                                              bool complete_possible)
20116 {
20117   FragrecordPtr curr_fragptr;
20118 
20119   if (lcpPtr.p->m_wait_early_lcp_synch)
20120   {
20121     jam();
20122     /**
20123      * We are waiting for a signal back from Backup block containing
20124      * a signal that all outstanding fragments that was put into LCP
20125      * queue by CREATE TABLE statements have been executed and have
20126      * completed.
20127      * We could receive the lastFragmentFlag while waiting and it is
20128      * important to not be in IDLE state when this arrives since this
20129      * will flag that the LCP is completed which is not the case here.
20130      */
20131     lcpPtr.p->lcpPrepareState = LcpRecord::LCP_PREPARING;
20132     return;
20133   }
20134   do
20135   {
20136     if (c_queued_lcp_frag_ord.isEmpty())
20137     {
20138       jam();
20139       lcpPtr.p->lcpPrepareState = LcpRecord::LCP_IDLE;
20140       break;
20141     }
20142     jam();
20143     /* ----------------------------------------------------------------------
20144      *  Remove first queued fragment from queue.
20145      *  Transfer the state from the queued to the active LCP.
20146      * --------------------------------------------------------------------- */
20147     c_queued_lcp_frag_ord.first(curr_fragptr);
20148     c_queued_lcp_frag_ord.removeFirst(curr_fragptr);
20149     curr_fragptr.p->lcp_frag_ord_state = Fragrecord::LCP_EXECUTING;
20150     if (exec_prepare_next_fragment_checkpoint(signal, curr_fragptr))
20151     {
20152       jam();
20153       break;
20154     }
20155   } while (1);
20156   if (is_lcp_idle(lcpPtr.p))
20157   {
20158     /**
20159      * We have no queued fragments waiting to be prepared. We also
20160      * have no ongoing fragment executing its LCP. If we also received
20161      * the last fragment then we have completed this LCP.
20162      */
20163     jam();
20164     if (lcpPtr.p->lastFragmentFlag)
20165     {
20166       jam();
20167       ndbrequire(complete_possible);
20168       completeLcpRoundLab(signal, c_lcpId);
20169     }
20170   }
20171 }
20172 
20173 bool
exec_prepare_next_fragment_checkpoint(Signal * signal,FragrecordPtr curr_fragptr)20174 Dblqh::exec_prepare_next_fragment_checkpoint(Signal* signal,
20175                                              FragrecordPtr curr_fragptr)
20176 {
20177   TablerecPtr tabPtr;
20178   tabPtr.i = curr_fragptr.p->tabRef;
20179   ptrCheckGuard(tabPtr, ctabrecFileSize, tablerec);
20180   lcpPtr.p->currentPrepareFragment.fragPtrI = curr_fragptr.i;
20181   if (curr_fragptr.p->m_create_table_flag_lcp_frag_ord)
20182   {
20183     jam();
20184     curr_fragptr.p->lcp_frag_ord_lcp_id =
20185       lcpPtr.p->currentPrepareFragment.lcpFragOrd.lcpId;
20186     curr_fragptr.p->lcp_frag_ord_lcp_no = 0;
20187   }
20188   lcpPtr.p->currentPrepareFragment.lcpFragOrd.lcpNo =
20189        curr_fragptr.p->lcp_frag_ord_lcp_no;
20190   lcpPtr.p->currentPrepareFragment.lcpFragOrd.fragmentId = curr_fragptr.p->fragId;
20191   lcpPtr.p->currentPrepareFragment.lcpFragOrd.tableId = tabPtr.i;
20192 
20193   if (lcpPtr.p->m_early_lcps_need_synch)
20194   {
20195     if (curr_fragptr.p->m_create_table_insert_lcp)
20196     {
20197       jam();
20198       ndbrequire(lcpPtr.p->m_wait_early_lcp_synch == false);
20199     }
20200     else
20201     {
20202       jam();
20203       /**
20204        * One or more tables were created between two LCPs.
20205        * We need to ensure that those fragments complete their
20206        * LCP before any other fragment is allowed to complete.
20207        * This will ensure that the LCP that any table requires
20208        * the UNDO log to roll back to is sufficient for these
20209        * tables
20210        *
20211        * See comment in insert_new_fragments_into_lcp for more
20212        * details.
20213        */
20214       DEB_EARLY_LCP(("(%u)WAIT_LCP_IDLE_REQ: tab(%u,%u)",
20215                      instance(),
20216                      curr_fragptr.p->tabRef,
20217                      curr_fragptr.p->fragId));
20218       lcpPtr.p->lcpPrepareState = LcpRecord::LCP_PREPARING;
20219       lcpPtr.p->m_early_lcps_need_synch = false;
20220       lcpPtr.p->m_wait_early_lcp_synch = true;
20221       curr_fragptr.p->lcp_frag_ord_state = Fragrecord::LCP_EXECUTING;
20222       BlockReference backupRef = calcInstanceBlockRef(BACKUP);
20223       signal->theData[0] = m_backup_ptr;
20224       sendSignal(backupRef, GSN_WAIT_LCP_IDLE_REQ,
20225                  signal, 1, JBB);
20226       return true;
20227     }
20228   }
20229 
20230   curr_fragptr.p->m_create_table_insert_lcp = false;
20231 
20232   if (unlikely(tabPtr.p->tableStatus != Tablerec::TABLE_DEFINED &&
20233                tabPtr.p->tableStatus != Tablerec::TABLE_READ_ONLY))
20234   {
20235     jam();
20236     /**
20237      * Fake that the fragment is done
20238      */
20239     ndbrequire(c_localLcpId == 0);
20240     completed_fragment_checkpoint(signal,
20241                                   lcpPtr.p->currentPrepareFragment);
20242     return false;
20243   }
20244 
20245   /**
20246    * We need to perform LCPs also of read-only tables since there might
20247    * have been changes to the table between now and when the table was
20248    * made read only.
20249    */
20250 
20251   lcpPtr.p->lcpPrepareState = LcpRecord::LCP_PREPARING;
20252   LcpPrepareReq* req= (LcpPrepareReq*)signal->getDataPtr();
20253   req->senderData = lcpPtr.i;
20254   req->senderRef = reference();
20255   req->lcpNo = lcpPtr.p->currentPrepareFragment.lcpFragOrd.lcpNo;
20256   req->tableId = lcpPtr.p->currentPrepareFragment.lcpFragOrd.tableId;
20257   req->fragmentId = lcpPtr.p->currentPrepareFragment.lcpFragOrd.fragmentId;
20258   req->lcpId = lcpPtr.p->currentPrepareFragment.lcpFragOrd.lcpId;
20259   req->backupPtr = m_backup_ptr;
20260   req->backupId = lcpPtr.p->currentPrepareFragment.lcpFragOrd.lcpId;
20261   req->createGci = curr_fragptr.p->createGci;
20262   req->localLcpId = m_curr_local_lcp_id;
20263   BlockReference backupRef = calcInstanceBlockRef(BACKUP);
20264   if (!ERROR_INSERTED(5053))
20265   {
20266     sendSignal(backupRef, GSN_LCP_PREPARE_REQ, signal,
20267 	       LcpPrepareReq::SignalLength, JBA);
20268   }
20269   else
20270   {
20271     sendSignalWithDelay(backupRef, GSN_LCP_PREPARE_REQ, signal,
20272 	                150, LcpPrepareReq::SignalLength);
20273   }
20274 
20275   /* Now start the LCP fragment watchdog */
20276   if (lcpPtr.p->lcpRunState == LcpRecord::LCP_IDLE)
20277   {
20278     jam();
20279     startLcpFragWatchdog(signal);
20280   }
20281   return true;
20282 }//Dblqh::prepare_next_fragment_checkpoint()
20283 
20284 void
execWAIT_LCP_IDLE_CONF(Signal * signal)20285 Dblqh::execWAIT_LCP_IDLE_CONF(Signal *signal)
20286 {
20287   FragrecordPtr curr_fragptr;
20288   lcpPtr.i = signal->theData[0];
20289   ptrCheckGuard(lcpPtr, clcpFileSize, lcpRecord);
20290   ndbrequire(lcpPtr.p->m_wait_early_lcp_synch);
20291   ndbrequire(!lcpPtr.p->m_early_lcps_need_synch);
20292 
20293   /**
20294    * No action is supposed to remove the fragment from the LCP list
20295    * while we are waiting for the previous fragments to complete. Only
20296    * completion of checkpoint for this fragment and an attempt to
20297    * prepare the fragment could cause this fragment to be removed and
20298    * none of those actions are possible while we are waiting for
20299    * WAIT_LCP_IDLE_CONF to return from the Backup block.
20300    *
20301    * It is also possible for Drop Table to remove if the state is
20302    * LCP_QUEUED, to ensure this won't happen the state of the fragment
20303    * was set to LCP_EXECUTING before starting the wait.
20304    *
20305    * The reason to be so careful with this protection against removal
20306    * is to avoid having to handle that the LCP completes while we are
20307    * waiting for the other fragments to complete. If that was possible
20308    * we would have to check for completion of LCP here as well. Now we
20309    * can skip that since it is safe that we are not arriving here in
20310    * that state.
20311    *
20312    * It is possible for the fragment LCP to be generated by a table
20313    * creation. This table should not be removed when the table is
20314    * in the state LCP_EXECUTING, so we rely on that this fragment is
20315    * still here when we return here. The fragment LCP can be dropped
20316    * however when we restart it again.
20317    */
20318   curr_fragptr.i = lcpPtr.p->currentPrepareFragment.fragPtrI;
20319   c_fragment_pool.getPtr(curr_fragptr);
20320   ndbrequire(curr_fragptr.p->lcp_frag_ord_state == Fragrecord::LCP_EXECUTING);
20321   c_queued_lcp_frag_ord.addFirst(curr_fragptr);
20322   curr_fragptr.p->lcp_frag_ord_state = Fragrecord::LCP_QUEUED;
20323   ndbrequire(curr_fragptr.p->m_create_table_insert_lcp == false);
20324   lcpPtr.p->m_wait_early_lcp_synch = false;
20325   DEB_EARLY_LCP(("(%u) WAIT_LCP_IDLE_CONF, tab(%u,%u)",
20326                  instance(),
20327                  curr_fragptr.p->tabRef,
20328                  curr_fragptr.p->fragId));
20329   prepare_next_fragment_checkpoint(signal, true);
20330 }
20331 
20332 /* --------------------------------------------------------------------------
20333  *       THE LOCAL CHECKPOINT ROUND IS NOW COMPLETED. SEND COMPLETED MESSAGE
20334  *       TO THE MASTER DIH.
20335  * ------------------------------------------------------------------------- */
completeLcpRoundLab(Signal * signal,Uint32 lcpId)20336 void Dblqh::completeLcpRoundLab(Signal* signal, Uint32 lcpId)
20337 {
20338   if (c_fragments_in_lcp == 0)
20339   {
20340     jam();
20341     lcpPtr.i = 0;
20342     ptrAss(lcpPtr, lcpRecord);
20343     sendLCP_COMPLETE_REP(signal,
20344                          lcpPtr.p->currentPrepareFragment.lcpFragOrd.lcpId);
20345     return;
20346   }
20347   startLcpFragWatchdog(signal);
20348   DEB_LCP(("(%u)Start complete LCP %u", instance(), lcpId));
20349   clcpCompletedState = LCP_CLOSE_STARTED;
20350   EndLcpReq* req= (EndLcpReq*)signal->getDataPtr();
20351   req->senderData= lcpId;
20352   req->senderRef= reference();
20353   req->backupPtr= m_backup_ptr;
20354   req->backupId= lcpId;
20355   BlockReference backupRef = calcInstanceBlockRef(BACKUP);
20356   sendSignal(backupRef, GSN_END_LCPREQ, signal,
20357 	     EndLcpReq::SignalLength, JBA);
20358 }//Dblqh::completeLcpRoundLab()
20359 
execEND_LCPCONF(Signal * signal)20360 void Dblqh::execEND_LCPCONF(Signal* signal)
20361 {
20362   EndLcpConf *conf = (EndLcpConf*)signal->getDataPtr();
20363   jamEntry();
20364 
20365   ndbrequire(clcpCompletedState == LCP_CLOSE_STARTED);
20366   BlockReference backupRef = calcInstanceBlockRef(BACKUP);
20367   if (!isNdbMtLqh() &&
20368       conf->senderRef == backupRef)
20369   {
20370     /**
20371      * ndbd also needs to send to TSMAN (handled by Proxy block in ndbmtd).
20372      */
20373     jam();
20374     Uint32 lcpId = conf->senderData;
20375     EndLcpReq* req= (EndLcpReq*)signal->getDataPtr();
20376     req->senderData= lcpId;
20377     req->senderRef= reference();
20378     req->backupPtr= m_backup_ptr;
20379     req->backupId= lcpId;
20380     sendSignal(TSMAN_REF, GSN_END_LCPREQ, signal,
20381 	       EndLcpReq::SignalLength, JBA);
20382     return;
20383   }
20384   stopLcpFragWatchdog();
20385   lcpPtr.i = 0;
20386   ptrAss(lcpPtr, lcpRecord);
20387   sendLCP_COMPLETE_REP(signal,
20388                        lcpPtr.p->currentPrepareFragment.lcpFragOrd.lcpId);
20389   CRASH_INSERTION(5056);
20390 }//Dblqh::execEND_LCPCONF()
20391 
sendLCP_COMPLETE_REP(Signal * signal,Uint32 lcpId)20392 void Dblqh::sendLCP_COMPLETE_REP(Signal* signal, Uint32 lcpId)
20393 {
20394   ndbrequire((cnoOfNodes - 1) < (MAX_NDB_NODES - 1));
20395   /* ------------------------------------------------------------------------
20396    *       WE SEND COMP_LCP_ROUND TO ALL NODES TO PREPARE FOR NODE CRASHES.
20397    * ----------------------------------------------------------------------- */
20398   lcpPtr.i = 0;
20399   ptrAss(lcpPtr, lcpRecord);
20400 
20401   infoEvent("LDM(%u): Completed LCP, #frags = %u"
20402             " #records = %llu, #bytes = %llu",
20403             instance(),
20404             cnoOfFragsCheckpointed,
20405             lcpPtr.p->m_no_of_records,
20406             lcpPtr.p->m_no_of_bytes);
20407 
20408   lcpPtr.p->lastFragmentFlag = false;
20409   lcpPtr.p->firstFragmentFlag = false;
20410   lcpPtr.p->m_no_of_records = 0;
20411   lcpPtr.p->m_no_of_bytes = 0;
20412   cnoOfFragsCheckpointed = 0;
20413   clcpCompletedState = LCP_IDLE;
20414   c_fragments_in_lcp = 0;
20415 
20416   if (c_localLcpId != 0)
20417   {
20418     jam();
20419     /**
20420      * We have completed a local LCP, report it locally and continue
20421      * restart processing.
20422      */
20423     complete_local_lcp(signal);
20424     return;
20425   }
20426   /**
20427    * Before we report that we're done to cluster level we want to
20428    * wait until the GCI that makes the LCP completely done is safe.
20429    * After that we have also ensured that the UNDO and REDO log tails
20430    * have been cut. Only after this is done are we ready to report
20431    * back to DIH that the LCP is complete in this LDM.
20432    *
20433    * This coordination happens in NDBCNTR.
20434    */
20435   ndbrequire(c_keep_gci_for_lcp <= c_max_keep_gci_in_lcp ||
20436              c_num_fragments_created_since_restart == 0);
20437   LcpAllCompleteReq* req = (LcpAllCompleteReq*)signal->getDataPtrSend();
20438   req->senderRef = reference();
20439   req->lcpId = lcpId;
20440   req->maxGciInLcp = c_max_gci_in_lcp;
20441   req->maxKeepGci = c_max_keep_gci_in_lcp;
20442   sendSignal(NDBCNTR_REF, GSN_LCP_ALL_COMPLETE_REQ, signal,
20443              LcpAllCompleteReq::SignalLength, JBB);
20444 }
20445 
execLCP_ALL_COMPLETE_CONF(Signal * signal)20446 void Dblqh::execLCP_ALL_COMPLETE_CONF(Signal *signal)
20447 {
20448   lcpPtr.i = 0;
20449   ptrAss(lcpPtr, lcpRecord);
20450   Uint32 lcpId = signal->theData[0];
20451   LcpCompleteRep* rep = (LcpCompleteRep*)signal->getDataPtrSend();
20452   rep->nodeId = getOwnNodeId();
20453   rep->lcpId = lcpId;
20454   rep->blockNo = DBLQH;
20455 
20456   Uint32 ref = DBDIH_REF;
20457   if (isNdbMtLqh())
20458   {
20459     jam();
20460     ref = DBLQH_REF;
20461   }
20462   rep->nodeId = LcpFragRep::BROADCAST_REQ;
20463 
20464   sendSignal(ref, GSN_LCP_COMPLETE_REP, signal,
20465              LcpCompleteRep::SignalLength, JBA);
20466 
20467   if (cstartRecReq < SRR_FIRST_LCP_DONE)
20468   {
20469     jam();
20470     ndbrequire(cstartRecReq == SRR_REDO_COMPLETE);
20471     cstartRecReq = SRR_FIRST_LCP_DONE;
20472   }
20473   return;
20474 
20475 }//Dblqh::sendLCP_COMPLETE_REP()
20476 
20477 /* ------------------------------------------------------------------------- */
20478 /* -------               SET THE LOG TAIL IN THE LOG FILES           ------- */
20479 /*                                                                           */
20480 /*THIS SUBROUTINE HAVE BEEN BUGGY AND IS RATHER COMPLEX. IT IS IMPORTANT TO  */
20481 /*REMEMBER THAT WE SEARCH FROM THE TAIL UNTIL WE REACH THE HEAD (CURRENT).   */
20482 /*THE TAIL AND HEAD CAN BE ON THE SAME MBYTE. WE SEARCH UNTIL WE FIND A MBYTE*/
20483 /*THAT WE NEED TO KEEP. WE THEN SET THE TAIL TO BE THE PREVIOUS. IF WE DO    */
20484 /*NOT FIND A MBYTE THAT WE NEED TO KEEP UNTIL WE REACH THE HEAD THEN WE USE  */
20485 /*THE HEAD AS TAIL. FINALLY WE HAVE TO MOVE BACK THE TAIL TO ALSO INCLUDE    */
20486 /*ALL PREPARE RECORDS. THIS MEANS THAT LONG-LIVED TRANSACTIONS ARE DANGEROUS */
20487 /*FOR SHORT LOGS.                                                            */
20488 /* ------------------------------------------------------------------------- */
20489 
setLogTail(Signal * signal,Uint32 keepGci)20490 void Dblqh::setLogTail(Signal* signal, Uint32 keepGci)
20491 {
20492   LogPartRecordPtr sltLogPartPtr;
20493   LogFileRecordPtr sltLogFilePtr;
20494   UintR tsltMbyte;
20495   UintR tsltStartMbyte;
20496   UintR tsltIndex;
20497   UintR tsltFlag;
20498 
20499   for (sltLogPartPtr.i = 0; sltLogPartPtr.i < clogPartFileSize; sltLogPartPtr.i++) {
20500     jam();
20501     bool TchangeMB = false;
20502 retry:
20503     ptrAss(sltLogPartPtr, logPartRecord);
20504     findLogfile(signal, sltLogPartPtr.p->logTailFileNo,
20505                 sltLogPartPtr, &sltLogFilePtr);
20506 
20507     tsltMbyte = sltLogPartPtr.p->logTailMbyte;
20508     tsltStartMbyte = tsltMbyte;
20509     tsltFlag = ZFALSE;
20510     if (sltLogFilePtr.i == sltLogPartPtr.p->currentLogfile) {
20511 /* ------------------------------------------------------------------------- */
20512 /*THE LOG AND THE TAIL IS ALREADY IN THE SAME FILE.                          */
20513 /* ------------------------------------------------------------------------- */
20514       if (sltLogFilePtr.p->currentMbyte >= sltLogPartPtr.p->logTailMbyte) {
20515         jam();
20516 /* ------------------------------------------------------------------------- */
20517 /*THE CURRENT MBYTE IS AHEAD OF OR AT THE TAIL. THUS WE WILL ONLY LOOK FOR   */
20518 /*THE TAIL UNTIL WE REACH THE CURRENT MBYTE WHICH IS IN THIS LOG FILE.       */
20519 /*IF THE LOG TAIL IS AHEAD OF THE CURRENT MBYTE BUT IN THE SAME LOG FILE     */
20520 /*THEN WE HAVE TO SEARCH THROUGH ALL FILES BEFORE WE COME TO THE CURRENT     */
20521 /*MBYTE. WE ALWAYS STOP WHEN WE COME TO THE CURRENT MBYTE SINCE THE TAIL     */
20522 /*CAN NEVER BE BEFORE THE HEAD.                                              */
20523 /* ------------------------------------------------------------------------- */
20524         tsltFlag = ZTRUE;
20525       }//if
20526     }//if
20527 
20528 /* ------------------------------------------------------------------------- */
20529 /*NOW START SEARCHING FOR THE NEW TAIL, STARTING AT THE CURRENT TAIL AND     */
20530 /*PROCEEDING UNTIL WE FIND A MBYTE WHICH IS NEEDED TO KEEP OR UNTIL WE REACH */
20531 /*CURRENT MBYTE (THE HEAD).                                                  */
20532 /* ------------------------------------------------------------------------- */
20533   SLT_LOOP:
20534     for (tsltIndex = tsltStartMbyte;
20535 	 tsltIndex <= clogFileSize - 1;
20536 	 tsltIndex++) {
20537       if (sltLogFilePtr.p->logMaxGciStarted[tsltIndex] >= keepGci) {
20538 /* ------------------------------------------------------------------------- */
20539 /*WE ARE NOT ALLOWED TO STEP THE LOG ANY FURTHER AHEAD                       */
20540 /*SET THE NEW LOG TAIL AND CONTINUE WITH NEXT LOG PART.                      */
20541 /*THIS MBYTE IS NOT TO BE INCLUDED SO WE NEED TO STEP BACK ONE MBYTE.        */
20542 /* ------------------------------------------------------------------------- */
20543         /* Check keepGCI MB has a reasonable GCI value */
20544         ndbrequire(sltLogFilePtr.p->logMaxGciStarted[tsltIndex] != ((Uint32) -1));
20545         if (tsltIndex != 0) {
20546           jam();
20547           tsltMbyte = tsltIndex - 1;
20548         } else {
20549           jam();
20550 /* ------------------------------------------------------------------------- */
20551 /*STEPPING BACK INCLUDES ALSO STEPPING BACK TO THE PREVIOUS LOG FILE.        */
20552 /* ------------------------------------------------------------------------- */
20553           tsltMbyte = clogFileSize - 1;
20554           sltLogFilePtr.i = sltLogFilePtr.p->prevLogFile;
20555           ptrCheckGuard(sltLogFilePtr, clogFileFileSize, logFileRecord);
20556         }//if
20557         goto SLT_BREAK;
20558       } else {
20559         jam();
20560         if (tsltFlag == ZTRUE) {
20561 /* ------------------------------------------------------------------------- */
20562 /*WE ARE IN THE SAME FILE AS THE CURRENT MBYTE AND WE CAN REACH THE CURRENT  */
20563 /*MBYTE BEFORE WE REACH A NEW TAIL.                                          */
20564 /* ------------------------------------------------------------------------- */
20565           if (tsltIndex == sltLogFilePtr.p->currentMbyte) {
20566             jam();
20567 /* ------------------------------------------------------------------------- */
20568 /*THE TAIL OF THE LOG IS ACTUALLY WITHIN THE CURRENT MBYTE. THUS WE SET THE  */
20569 /*LOG TAIL TO BE THE CURRENT MBYTE.                                          */
20570 /* ------------------------------------------------------------------------- */
20571             tsltMbyte = sltLogFilePtr.p->currentMbyte;
20572             goto SLT_BREAK;
20573           }//if
20574         }//if
20575       }//if
20576     }//for
20577     sltLogFilePtr.i = sltLogFilePtr.p->nextLogFile;
20578     ptrCheckGuard(sltLogFilePtr, clogFileFileSize, logFileRecord);
20579     if (sltLogFilePtr.i == sltLogPartPtr.p->currentLogfile) {
20580       jam();
20581       tsltFlag = ZTRUE;
20582     }//if
20583     tsltStartMbyte = 0;
20584     goto SLT_LOOP;
20585   SLT_BREAK:
20586     jam();
20587     {
20588       UintR ToldTailFileNo = sltLogPartPtr.p->logTailFileNo;
20589       UintR ToldTailMByte = sltLogPartPtr.p->logTailMbyte;
20590 
20591 /* ------------------------------------------------------------------------- */
20592 /*SINCE LOG_MAX_GCI_STARTED ONLY KEEP TRACK OF COMMIT LOG RECORDS WE ALSO    */
20593 /*HAVE TO STEP BACK THE TAIL SO THAT WE INCLUDE ALL PREPARE RECORDS          */
20594 /*NEEDED FOR THOSE COMMIT RECORDS IN THIS MBYTE. THIS IS A RATHER            */
20595 /*CONSERVATIVE APPROACH BUT IT WORKS.                                        */
20596 /* ------------------------------------------------------------------------- */
20597       arrGuard(tsltMbyte, clogFileSize);
20598       sltLogPartPtr.p->logTailFileNo =
20599         sltLogFilePtr.p->logLastPrepRef[tsltMbyte] >> 16;
20600       sltLogPartPtr.p->logTailMbyte =
20601         sltLogFilePtr.p->logLastPrepRef[tsltMbyte] & 65535;
20602 
20603       if (DEBUG_REDO)
20604       {
20605         ndbout_c("part: %u setLogTail(gci: %u): file: %u mb: %u",
20606                  sltLogPartPtr.p->logPartNo,
20607                  keepGci,
20608                  sltLogPartPtr.p->logTailFileNo,
20609                  sltLogPartPtr.p->logTailMbyte);
20610       }
20611 
20612       bool tailmoved = !(ToldTailFileNo == sltLogPartPtr.p->logTailFileNo &&
20613                          ToldTailMByte == sltLogPartPtr.p->logTailMbyte);
20614 
20615       LogFileRecordPtr tmpfile;
20616       tmpfile.i = sltLogPartPtr.p->currentLogfile;
20617       ptrCheckGuard(tmpfile, clogFileFileSize, logFileRecord);
20618 
20619       LogPosition head = { tmpfile.p->fileNo, tmpfile.p->currentMbyte };
20620       LogPosition tail = { sltLogPartPtr.p->logTailFileNo,
20621                            sltLogPartPtr.p->logTailMbyte};
20622       Uint64 free_mb = free_log(head, tail, sltLogPartPtr.p->noLogFiles,
20623                            clogFileSize);
20624 
20625 #ifdef DEBUG_CUT_REDO
20626       {
20627         TcConnectionrecPtr tmp;
20628         tmp.i = sltLogPartPtr.p->firstLogTcrec;
20629         Uint32 fileNo = -1;
20630         Uint32 mbyte = -1;
20631         if (tmp.i != RNIL)
20632         {
20633           jam();
20634           ndbrequire(tcConnect_pool.getValidPtr(tmp));
20635           fileNo = tmp.p->logStartFileNo;
20636           mbyte = tmp.p->logStartPageNo >> ZTWOLOG_NO_PAGES_IN_MBYTE;
20637         }
20638         DEB_CUT_REDO(("(%u)Logpart: %u, gci: %u, tail(%u,%u), old_tail(%u,%u)"
20639                       ", tc_tail(%u,%u), mb: %llu",
20640                       instance(),
20641                       sltLogPartPtr.p->logPartNo,
20642                       keepGci,
20643                       sltLogPartPtr.p->logTailFileNo,
20644                      sltLogPartPtr.p->logTailMbyte,
20645                       ToldTailFileNo,
20646                       ToldTailMByte,
20647                       fileNo,
20648                       mbyte,
20649                       free_mb));
20650       }
20651 #endif
20652 
20653       if (free_mb <= c_free_mb_force_lcp_limit)
20654       {
20655         /**
20656          * Force a new LCP
20657          */
20658         force_lcp(signal);
20659       }
20660       Uint32 committed_mbytes = get_committed_mbytes(sltLogPartPtr.p);
20661       if (tailmoved &&
20662           (free_mb >
20663            (c_free_mb_tail_problem_limit + committed_mbytes)))
20664       {
20665         jam();
20666         update_log_problem(signal, sltLogPartPtr,
20667                            LogPartRecord::P_TAIL_PROBLEM, false);
20668       }
20669       else if (!tailmoved && free_mb <= c_free_mb_force_lcp_limit)
20670       {
20671         jam();
20672         /**
20673          * Tail didn't move...and we forced a new LCP
20674          *   This could be as currentMb, contains backreferences making it
20675          *   Check if changing mb forward will help situation
20676          */
20677         if ((free_mb + committed_mbytes) < 4)
20678         {
20679           /**
20680            * Less than 4 mb free, no point in trying to changeMbyte forward...
20681            */
20682           jam();
20683           goto next;
20684         }
20685 
20686         if (TchangeMB)
20687         {
20688           jam();
20689           /**
20690            * We already did move forward...
20691            */
20692           goto next;
20693         }
20694 
20695         TcConnectionrecPtr tmp;
20696         tmp.i = sltLogPartPtr.p->firstLogTcrec;
20697         if (tmp.i != RNIL)
20698         {
20699           jam();
20700           ndbrequire(tcConnect_pool.getValidPtr(tmp));
20701           Uint32 fileNo = tmp.p->logStartFileNo;
20702           Uint32 mbyte = tmp.p->logStartPageNo >> ZTWOLOG_NO_PAGES_IN_MBYTE;
20703 
20704           if (fileNo == sltLogPartPtr.p->logTailFileNo &&
20705               mbyte == sltLogPartPtr.p->logTailMbyte)
20706           {
20707             jam();
20708             /**
20709              * An uncommitted operation...still pending...
20710              *   with back-reference to tail...not much to do
20711              *   (theoretically we could rewrite log-entry here...
20712              *    but this is for future)
20713              * skip to next
20714              */
20715             goto next;
20716           }
20717         }
20718 
20719         {
20720           /**
20721            * Try forcing a changeMbyte
20722            */
20723           jam();
20724           logPartPtr = sltLogPartPtr;
20725           logFilePtr.i = logPartPtr.p->currentLogfile;
20726           ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
20727           logPagePtr.i = logFilePtr.p->currentLogpage;
20728           ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
20729           changeMbyte(signal);
20730           TchangeMB = true; // don't try this twice...
20731           goto retry;
20732         }
20733       }
20734     }
20735 next:
20736     (void)1;
20737   }//for
20738 }//Dblqh::setLogTail()
20739 
20740 /* ######################################################################### */
20741 /* #######                       GLOBAL CHECKPOINT MODULE            ####### */
20742 /*                                                                           */
20743 /* ######################################################################### */
20744 /*---------------------------------------------------------------------------*/
20745 /* THIS MODULE HELPS DIH IN DISCOVERING WHEN GLOBAL CHECKPOINTS ARE          */
20746 /* RECOVERABLE. IT HANDLES THE REQUEST GCP_SAVEREQ THAT REQUESTS LQH TO      */
20747 /* SAVE A PARTICULAR GLOBAL CHECKPOINT TO DISK AND RESPOND WHEN COMPLETED.   */
20748 /*---------------------------------------------------------------------------*/
20749 /* *************** */
20750 /*  GCP_SAVEREQ  > */
20751 /* *************** */
20752 
20753 #if defined VM_TRACE || defined ERROR_INSERT
20754 static Uint32 m_gcp_monitor = 0;
20755 #endif
20756 
execGCP_SAVEREQ(Signal * signal)20757 void Dblqh::execGCP_SAVEREQ(Signal* signal)
20758 {
20759   jamEntry();
20760   const GCPSaveReq * const saveReq = (GCPSaveReq *)&signal->theData[0];
20761 
20762   CRASH_INSERTION(5000);
20763 
20764   if (ERROR_INSERTED(5007)){
20765     CLEAR_ERROR_INSERT_VALUE;
20766     sendSignalWithDelay(cownref, GSN_GCP_SAVEREQ, signal, 10000,
20767 			signal->length());
20768     return;
20769   }
20770 
20771   const Uint32 dihBlockRef = saveReq->dihBlockRef;
20772   const Uint32 dihPtr = saveReq->dihPtr;
20773   const Uint32 gci = saveReq->gci;
20774 
20775   ndbrequire(refToNode(signal->getSendersBlockRef()) == getOwnNodeId());
20776 
20777 #if defined VM_TRACE || defined ERROR_INSERT
20778   if (!isNdbMtLqh()) { // wl4391_todo mt-safe
20779   ndbrequire(m_gcp_monitor == 0 ||
20780              (m_gcp_monitor == gci) ||
20781              (m_gcp_monitor + 1) == gci);
20782   }
20783   m_gcp_monitor = gci;
20784 #endif
20785 
20786   if(getNodeState().startLevel >= NodeState::SL_STOPPING_4)
20787   {
20788     DEB_GCP(("(%u)SL_STOPPING_4: gci = %u", instance(), gci));
20789     GCPSaveRef * const saveRef = (GCPSaveRef*)&signal->theData[0];
20790     saveRef->dihPtr = dihPtr;
20791     saveRef->nodeId = getOwnNodeId();
20792     saveRef->gci    = gci;
20793     saveRef->errorCode = GCPSaveRef::NodeShutdownInProgress;
20794     sendSignal(dihBlockRef, GSN_GCP_SAVEREF, signal,
20795 	       GCPSaveRef::SignalLength, JBB);
20796     return;
20797   }
20798 
20799   /**
20800    * Cannot update cnewestCompletedGci during SL_STOPPING_4 since we
20801    * no longer participate in GCPs.
20802    */
20803   Uint32 saveNewestCompletedGci = cnewestCompletedGci;
20804   cnewestCompletedGci = gci;
20805 
20806   if (cstartRecReq < SRR_REDO_COMPLETE)
20807   {
20808     DEB_GCP(("(%u)!SRR_REDO_COMPLETE: gci = %u", instance(), gci));
20809     /**
20810      * REDO running is not complete
20811      */
20812     GCPSaveRef * const saveRef = (GCPSaveRef*)&signal->theData[0];
20813     saveRef->dihPtr = dihPtr;
20814     saveRef->nodeId = getOwnNodeId();
20815     saveRef->gci    = gci;
20816     saveRef->errorCode = GCPSaveRef::NodeRestartInProgress;
20817     sendSignal(dihBlockRef, GSN_GCP_SAVEREF, signal,
20818 	       GCPSaveRef::SignalLength, JBB);
20819     return;
20820   }
20821 
20822   ndbrequire(gci >= saveNewestCompletedGci);
20823 
20824   if (gci == saveNewestCompletedGci)
20825   {
20826 /*---------------------------------------------------------------------------*/
20827 /* GLOBAL CHECKPOINT HAVE ALREADY BEEN HANDLED. REQUEST MUST HAVE BEEN SENT  */
20828 /* FROM NEW MASTER DIH.                                                      */
20829 /*---------------------------------------------------------------------------*/
20830     DEB_GCP(("(%u)GCP already sent: gci = %u", instance(), gci));
20831     if (ccurrentGcprec == RNIL) {
20832       jam();
20833 /*---------------------------------------------------------------------------*/
20834 /* THIS INDICATES THAT WE HAVE ALREADY SENT GCP_SAVECONF TO PREVIOUS MASTER. */
20835 /* WE SIMPLY SEND IT ALSO TO THE NEW MASTER.                                 */
20836 /*---------------------------------------------------------------------------*/
20837       GCPSaveConf * const saveConf = (GCPSaveConf*)&signal->theData[0];
20838       saveConf->dihPtr = dihPtr;
20839       saveConf->nodeId = getOwnNodeId();
20840       saveConf->gci    = cnewestCompletedGci;
20841       sendSignal(dihBlockRef, GSN_GCP_SAVECONF, signal,
20842 		 GCPSaveConf::SignalLength, JBA);
20843       return;
20844     }
20845     jam();
20846 /*---------------------------------------------------------------------------*/
20847 /* WE HAVE NOT YET SENT THE RESPONSE TO THE OLD MASTER. WE WILL SET THE NEW  */
20848 /* RECEIVER OF THE RESPONSE AND THEN EXIT SINCE THE PROCESS IS ALREADY       */
20849 /* STARTED.                                                                  */
20850 /*---------------------------------------------------------------------------*/
20851     gcpPtr.i = ccurrentGcprec;
20852     ptrCheckGuard(gcpPtr, cgcprecFileSize, gcpRecord);
20853     gcpPtr.p->gcpUserptr = dihPtr;
20854     gcpPtr.p->gcpBlockref = dihBlockRef;
20855     ndbrequire(refToMain(gcpPtr.p->gcpBlockref) == DBDIH ||
20856                refToMain(gcpPtr.p->gcpBlockref) == DBLQH);
20857     return;
20858   }//if
20859 
20860   ndbrequire(ccurrentGcprec == RNIL);
20861   cnewestCompletedGci = gci;
20862   if (gci > cnewestGci) {
20863     jam();
20864     cnewestGci = gci;
20865   }//if
20866 
20867   if(cstartRecReq < SRR_FIRST_LCP_DONE)
20868   {
20869     /**
20870      * First LCP has not been done
20871      */
20872     jam();
20873     DEB_GCP(("(%u)!SRR_FIRST_LCP_DONE: gci = %u", instance(), gci));
20874     c_local_sysfile.m_save_gci = gci;
20875     c_local_sysfile.m_dihPtr = dihPtr;
20876     c_local_sysfile.m_dihRef = dihBlockRef;
20877     c_send_gcp_saveref_needed = true;
20878     if ((m_node_restart_first_local_lcp_started ||
20879          m_first_distributed_lcp_started) &&
20880         is_first_instance())
20881     {
20882       jam();
20883       write_local_sysfile_gcp_complete(signal, gci - 1);
20884     }
20885     else
20886     {
20887       jam();
20888       write_local_sysfile_gcp_complete_done(signal);
20889     }
20890     return;
20891   }
20892 
20893   CRASH_INSERTION(5052);
20894 
20895 #ifdef GCP_TIMER_HACK
20896   globalData.gcp_timer_save[0] = NdbTick_getCurrentTicks();
20897 #endif
20898 
20899   if (cstartPhase == ZNIL)
20900   {
20901     jam();
20902     /**
20903      * The node have completed its start at least up to phase 50 which
20904      * means our node is fully restorable and we can treat this GCI
20905      * as restorable.
20906      *
20907      * After completing the restart LCP but before the node restart
20908      * is completed we won't send any writes to local sysfile, but
20909      * also we won't report the GCI as restorable just yet.
20910      * This will not have any major impact since after the restart LCP
20911      * is completed a very short time should pass before we get to
20912      * phase 9 where the LQH restart is fully completed and we know
20913      * that we are restorable again.
20914      */
20915     sendRESTORABLE_GCI_REP(signal, gci);
20916   }
20917 
20918   ccurrentGcprec = 0;
20919   gcpPtr.i = ccurrentGcprec;
20920   ptrCheckGuard(gcpPtr, cgcprecFileSize, gcpRecord);
20921 
20922   gcpPtr.p->gcpBlockref = dihBlockRef;
20923   gcpPtr.p->gcpUserptr = dihPtr;
20924   gcpPtr.p->gcpId = gci;
20925   ndbrequire(refToMain(gcpPtr.p->gcpBlockref) == DBDIH ||
20926              refToMain(gcpPtr.p->gcpBlockref) == DBLQH);
20927 
20928   if (cstartPhase != ZNIL)
20929   {
20930     jam();
20931     if (is_first_instance())
20932     {
20933       if (c_start_phase_9_waiting)
20934       {
20935         jam();
20936         /**
20937          * We have reached Start phase 9 and no one is writing local sysfile
20938          * since we arrive here. Thus we will write restart completed into the
20939          * local sysfile before we flush the GCI into the REDO logs.
20940          */
20941         write_local_sysfile_restart_complete(signal);
20942       }
20943       else
20944       {
20945         jam();
20946         /**
20947          * We need to keep the local sysfile up to date with the
20948          * maximum restartable GCI until the restart is completed.
20949          * This GCI is not necessarily restartable, it is only a
20950          * maximum GCI that can be restarted. DIH decides what is
20951          * restartable. LQH keeps this information only to verify
20952          * that DIH is performing its action correctly.
20953          */
20954         write_local_sysfile_gcp_complete_late(signal, gci);
20955       }
20956       return;
20957     }
20958   }
20959   start_synch_gcp(signal);
20960 }
20961 
start_synch_gcp(Signal * signal)20962 void Dblqh::start_synch_gcp(Signal *signal)
20963 {
20964   ccurrentGcprec = 0;
20965   gcpPtr.i = ccurrentGcprec;
20966   ptrCheckGuard(gcpPtr, cgcprecFileSize, gcpRecord);
20967   bool tlogActive = false;
20968   for (logPartPtr.i = 0; logPartPtr.i < clogPartFileSize; logPartPtr.i++) {
20969     ptrAss(logPartPtr, logPartRecord);
20970     if (logPartPtr.p->logPartState == LogPartRecord::ACTIVE) {
20971       jam();
20972       logPartPtr.p->waitWriteGciLog = LogPartRecord::WWGL_TRUE;
20973       tlogActive = true;
20974       if (logPartPtr.p->LogLqhKeyReqSent == ZFALSE)
20975       {
20976         jam();
20977         logPartPtr.p->LogLqhKeyReqSent = ZTRUE;
20978         signal->theData[0] = ZLOG_LQHKEYREQ;
20979         signal->theData[1] = logPartPtr.i;
20980         sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
20981       }
20982     } else {
20983       jam();
20984       logPartPtr.p->waitWriteGciLog = LogPartRecord::WWGL_FALSE;
20985       logFilePtr.i = logPartPtr.p->currentLogfile;
20986       ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
20987       logPagePtr.i = logFilePtr.p->currentLogpage;
20988       ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
20989       writeCompletedGciLog(signal);
20990     }//if
20991   }//for
20992   if (tlogActive == true) {
20993     jam();
20994     return;
20995   }//if
20996   initGcpRecLab(signal);
20997   startTimeSupervision(signal);
20998   return;
20999 }
21000 
sendRESTORABLE_GCI_REP(Signal * signal,Uint32 gci)21001 void Dblqh::sendRESTORABLE_GCI_REP(Signal *signal, Uint32 gci)
21002 {
21003   /**
21004    * Report completed GCI (one less than the one we are now saving), to
21005    * give the NDBCNTR block a chance to know when it is ready to cut the
21006    * log tails.
21007    */
21008   signal->theData[0] = gci - 1;
21009   if (is_first_instance())
21010   {
21011     jam();
21012     sendSignal(NDBCNTR_REF, GSN_RESTORABLE_GCI_REP, signal, 1, JBB);
21013   }
21014   /**
21015    * Report completed GCI (one less than the one we are now saving), to
21016    * give the Backup block a chance to remove old LCP files.
21017    * Without this signal arriving to Backup block the node restart will
21018    * be blocked waiting for the proper GCI to delete the old files
21019    * and also waiting for this to ensure that it will validate the
21020    * LCP control files.
21021    */
21022   signal->theData[0] = gci - 1;
21023   EXECUTE_DIRECT(BACKUP, GSN_RESTORABLE_GCI_REP, signal, 1);
21024 }
21025 
21026 void
write_local_sysfile_gcp_complete_done(Signal * signal)21027 Dblqh::write_local_sysfile_gcp_complete_done(Signal *signal)
21028 {
21029   if (c_send_gcp_saveref_needed)
21030   {
21031     jam();
21032     c_send_gcp_saveref_needed = false;
21033     GCPSaveRef * const saveRef = (GCPSaveRef*)&signal->theData[0];
21034     saveRef->dihPtr = c_local_sysfile.m_dihPtr;
21035     saveRef->nodeId = getOwnNodeId();
21036     saveRef->gci    = c_local_sysfile.m_save_gci;
21037     saveRef->errorCode = GCPSaveRef::NodeRestartInProgress;
21038     sendSignal(c_local_sysfile.m_dihRef, GSN_GCP_SAVEREF, signal,
21039                GCPSaveRef::SignalLength, JBB);
21040     if (ERROR_INSERTED(5052))
21041     {
21042       jam();
21043       signal->theData[0] = 9999;
21044       sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 300, 1);
21045     }
21046   }
21047 }
21048 
21049 /**
21050  * This is needed for ndbmtd to serialize
21051  * SUB_GCP_COMPLETE_REP vs FIRE_TRIG_ORD
21052  */
21053 void
execSUB_GCP_COMPLETE_REP(Signal * signal)21054 Dblqh::execSUB_GCP_COMPLETE_REP(Signal* signal)
21055 {
21056   jamEntry();
21057   Uint32 len = signal->getLength();
21058   EXECUTE_DIRECT(DBTUP, GSN_SUB_GCP_COMPLETE_REP, signal, len);
21059   sendSignal(SUMA_REF, GSN_SUB_GCP_COMPLETE_REP, signal, len, JBB);
21060 }
21061 
21062 /* ------------------------------------------------------------------------- */
21063 /*  START TIME SUPERVISION OF THE LOG PARTS.                                 */
21064 /* ------------------------------------------------------------------------- */
startTimeSupervision(Signal * signal)21065 void Dblqh::startTimeSupervision(Signal* signal)
21066 {
21067   for (logPartPtr.i = 0; logPartPtr.i < clogPartFileSize; logPartPtr.i++) {
21068     jam();
21069     ptrAss(logPartPtr, logPartRecord);
21070 /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
21071 /* WE HAVE TO START CHECKING IF THE LOG IS TO BE WRITTEN EVEN IF PAGES ARE   */
21072 /* FULL. INITIALISE THE VALUES OF WHERE WE ARE IN THE LOG CURRENTLY.         */
21073 /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
21074     logPartPtr.p->logPartTimer = 0;
21075     logPartPtr.p->logTimer = 1;
21076     signal->theData[0] = ZTIME_SUPERVISION;
21077     signal->theData[1] = logPartPtr.i;
21078     sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
21079   }//for
21080 }//Dblqh::startTimeSupervision()
21081 
21082 /*---------------------------------------------------------------------------*/
21083 /* WE SET THE GLOBAL CHECKPOINT VARIABLES AFTER WRITING THE COMPLETED GCI LOG*/
21084 /* RECORD. THIS ENSURES THAT WE WILL ENCOUNTER THE COMPLETED GCI RECORD WHEN */
21085 /* WE EXECUTE THE FRAGMENT LOG.                                              */
21086 /*---------------------------------------------------------------------------*/
initGcpRecLab(Signal * signal)21087 void Dblqh::initGcpRecLab(Signal* signal)
21088 {
21089 /* ======================================================================== */
21090 /* =======               INITIATE GCP RECORD                        ======= */
21091 /*                                                                          */
21092 /*       SUBROUTINE SHORT NAME = IGR                                        */
21093 /* ======================================================================== */
21094   ndbrequire(clogPartFileSize <= NDB_MAX_LOG_PARTS);
21095   for (logPartPtr.i = 0; logPartPtr.i < clogPartFileSize; logPartPtr.i++) {
21096     jam();
21097     ptrAss(logPartPtr, logPartRecord);
21098 /*--------------------------------------------------*/
21099 /*       BY SETTING THE GCPREC = 0 WE START THE     */
21100 /*       CHECKING BY CHECK_GCP_COMPLETED. THIS      */
21101 /*       CHECKING MUST NOT BE STARTED UNTIL WE HAVE */
21102 /*       INSERTED ALL COMPLETE GCI LOG RECORDS IN   */
21103 /*       ALL LOG PARTS.                             */
21104 /*--------------------------------------------------*/
21105     logPartPtr.p->gcprec = 0;
21106     gcpPtr.p->gcpLogPartState[logPartPtr.i] = ZWAIT_DISK;
21107     gcpPtr.p->gcpSyncReady[logPartPtr.i] = ZFALSE;
21108     logFilePtr.i = logPartPtr.p->currentLogfile;
21109     ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
21110     gcpPtr.p->gcpFilePtr[logPartPtr.i] = logFilePtr.i;
21111     logPagePtr.i = logFilePtr.p->currentLogpage;
21112     ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
21113     if (logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] == ZPAGE_HEADER_SIZE) {
21114       jam();
21115 /*--------------------------------------------------*/
21116 /*       SINCE THE CURRENT FILEPAGE POINTS AT THE   */
21117 /*       NEXT WORD TO BE WRITTEN WE HAVE TO ADJUST  */
21118 /*       FOR THIS BY DECREASING THE FILE PAGE BY ONE*/
21119 /*       IF NO WORD HAS BEEN WRITTEN ON THE CURRENT */
21120 /*       FILEPAGE.                                  */
21121 /*--------------------------------------------------*/
21122       gcpPtr.p->gcpPageNo[logPartPtr.i] = logFilePtr.p->currentFilepage - 1;
21123       gcpPtr.p->gcpWordNo[logPartPtr.i] = ZPAGE_SIZE - 1;
21124     } else {
21125       jam();
21126       gcpPtr.p->gcpPageNo[logPartPtr.i] = logFilePtr.p->currentFilepage;
21127       gcpPtr.p->gcpWordNo[logPartPtr.i] =
21128 	logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] - 1;
21129     }//if
21130   }//for
21131   // initialize un-used part
21132   Uint32 Ti;
21133   for (Ti = clogPartFileSize; Ti < NDB_MAX_LOG_PARTS; Ti++) {
21134     gcpPtr.p->gcpFilePtr[Ti] = ZNIL;
21135     gcpPtr.p->gcpPageNo[Ti] = ZNIL;
21136     gcpPtr.p->gcpSyncReady[Ti] = FALSE;
21137     gcpPtr.p->gcpWordNo[Ti] = ZNIL;
21138   }
21139   return;
21140 }//Dblqh::initGcpRecLab()
21141 
21142 /* ========================================================================= */
21143 /* ==== CHECK IF ANY GLOBAL CHECKPOINTS ARE COMPLETED AFTER A COMPLETED===== */
21144 /*      DISK WRITE.                                                          */
21145 /*                                                                           */
21146 /*       SUBROUTINE SHORT NAME = CGC                                         */
21147 /* return: true if gcp was completed */
21148 /* ========================================================================= */
21149 bool
checkGcpCompleted(Signal * signal,Uint32 tcgcPageWritten,Uint32 tcgcWordWritten)21150 Dblqh::checkGcpCompleted(Signal* signal,
21151                          Uint32 tcgcPageWritten,
21152                          Uint32 tcgcWordWritten)
21153 {
21154   UintR tcgcFlag;
21155   UintR tcgcJ;
21156 
21157   gcpPtr.i = logPartPtr.p->gcprec;
21158   if (gcpPtr.i != RNIL)
21159   {
21160     jam();
21161     ndbrequire(logPartPtr.i < NDB_MAX_LOG_PARTS);
21162 /* ------------------------------------------------------------------------- */
21163 /* IF THE GLOBAL CHECKPOINT IS NOT WAITING FOR COMPLETION THEN WE CAN QUIT   */
21164 /* THE SEARCH IMMEDIATELY.                                                   */
21165 /* ------------------------------------------------------------------------- */
21166     ptrCheckGuard(gcpPtr, cgcprecFileSize, gcpRecord);
21167     if (gcpPtr.p->gcpFilePtr[logPartPtr.i] == logFilePtr.i) {
21168 /* ------------------------------------------------------------------------- */
21169 /* IF THE COMPLETED DISK OPERATION WAS ON ANOTHER FILE THAN THE ONE WE ARE   */
21170 /* WAITING FOR, THEN WE CAN ALSO QUIT THE SEARCH IMMEDIATELY.                */
21171 /* ------------------------------------------------------------------------- */
21172       if (tcgcPageWritten < gcpPtr.p->gcpPageNo[logPartPtr.i]) {
21173         jam();
21174 /* ------------------------------------------------------------------------- */
21175 /* THIS LOG PART HAVE NOT YET WRITTEN THE GLOBAL CHECKPOINT TO DISK.         */
21176 /* ------------------------------------------------------------------------- */
21177         return false;
21178       } else {
21179         if (tcgcPageWritten == gcpPtr.p->gcpPageNo[logPartPtr.i]) {
21180           if (tcgcWordWritten < gcpPtr.p->gcpWordNo[logPartPtr.i]) {
21181             jam();
21182 /* ------------------------------------------------------------------------- */
21183 /* THIS LOG PART HAVE NOT YET WRITTEN THE GLOBAL CHECKPOINT TO DISK.         */
21184 /* ------------------------------------------------------------------------- */
21185             return false;
21186           }//if
21187         }//if
21188       }//if
21189 /* ------------------------------------------------------------------------- */
21190 /* THIS LOG PART HAVE WRITTEN THE GLOBAL CHECKPOINT TO DISK.                 */
21191 /* ------------------------------------------------------------------------- */
21192       logPartPtr.p->gcprec = RNIL;
21193       gcpPtr.p->gcpLogPartState[logPartPtr.i] = ZON_DISK;
21194       tcgcFlag = ZTRUE;
21195       for (tcgcJ = 0; tcgcJ < clogPartFileSize; tcgcJ++)
21196       {
21197         jam();
21198         if (gcpPtr.p->gcpLogPartState[tcgcJ] != ZON_DISK) {
21199           jam();
21200 /* ------------------------------------------------------------------------- */
21201 /*ALL LOG PARTS HAVE NOT SAVED THIS GLOBAL CHECKPOINT TO DISK YET. WAIT FOR  */
21202 /*THEM TO COMPLETE.                                                          */
21203 /* ------------------------------------------------------------------------- */
21204           tcgcFlag = ZFALSE;
21205         }//if
21206       }//for
21207       if (tcgcFlag == ZFALSE)
21208       {
21209         return false;
21210       }
21211 
21212       if (tcgcFlag == ZTRUE)
21213       {
21214         jam();
21215 /* ------------------------------------------------------------------------- */
21216 /*WE HAVE FOUND A COMPLETED GLOBAL CHECKPOINT OPERATION. WE NOW NEED TO SEND */
21217 /*GCP_SAVECONF, REMOVE THE GCP RECORD FROM THE LIST OF WAITING GCP RECORDS   */
21218 /*ON THIS LOG PART AND RELEASE THE GCP RECORD.                               */
21219 // After changing the log implementation we need to perform a FSSYNCREQ on all
21220 // log files where the last log word resided first before proceeding.
21221 /* ------------------------------------------------------------------------- */
21222         UintR Ti;
21223         for (Ti = 0; Ti < clogPartFileSize; Ti++) {
21224           LogFileRecordPtr loopLogFilePtr;
21225           loopLogFilePtr.i = gcpPtr.p->gcpFilePtr[Ti];
21226           ptrCheckGuard(loopLogFilePtr, clogFileFileSize, logFileRecord);
21227           if (loopLogFilePtr.p->logFileStatus == LogFileRecord::OPEN) {
21228             jam();
21229             signal->theData[0] = loopLogFilePtr.p->fileRef;
21230             signal->theData[1] = cownref;
21231             signal->theData[2] = gcpPtr.p->gcpFilePtr[Ti];
21232             sendSignal(NDBFS_REF, GSN_FSSYNCREQ, signal, 3, JBA);
21233           } else {
21234             ndbrequire((loopLogFilePtr.p->logFileStatus ==
21235                         LogFileRecord::CLOSED) ||
21236                         (loopLogFilePtr.p->logFileStatus ==
21237                          LogFileRecord::CLOSING_WRITE_LOG) ||
21238                         (loopLogFilePtr.p->logFileStatus ==
21239                          LogFileRecord::OPENING_WRITE_LOG));
21240             signal->theData[0] = loopLogFilePtr.i;
21241             execFSSYNCCONF(signal);
21242           }//if
21243         }//for
21244       }//if
21245     }//if
21246     return true;
21247   }//if
21248   return false;
21249 }//Dblqh::checkGcpCompleted()
21250 
21251 void
execFSSYNCCONF(Signal * signal)21252 Dblqh::execFSSYNCCONF(Signal* signal)
21253 {
21254   GcpRecordPtr localGcpPtr;
21255   LogFileRecordPtr localLogFilePtr;
21256   LogPartRecordPtr localLogPartPtr;
21257   localLogFilePtr.i = signal->theData[0];
21258   ptrCheckGuard(localLogFilePtr, clogFileFileSize, logFileRecord);
21259   localLogPartPtr.i = localLogFilePtr.p->logPartRec;
21260   ptrCheckGuard(localLogPartPtr, clogPartFileSize, logPartRecord);
21261   localGcpPtr.i = ccurrentGcprec;
21262   ptrCheckGuard(localGcpPtr, cgcprecFileSize, gcpRecord);
21263   localGcpPtr.p->gcpSyncReady[localLogPartPtr.i] = ZTRUE;
21264   UintR Ti;
21265 
21266   if (DEBUG_REDO)
21267   {
21268     ndbout_c("part: %u file: %u gci: %u SYNC CONF",
21269              localLogPartPtr.p->logPartNo,
21270              localLogFilePtr.p->fileNo,
21271              localGcpPtr.p->gcpId);
21272   }
21273   for (Ti = 0; Ti < clogPartFileSize; Ti++) {
21274     jam();
21275     if (localGcpPtr.p->gcpSyncReady[Ti] == ZFALSE) {
21276       jam();
21277       return;
21278     }//if
21279   }//for
21280 
21281 #ifdef GCP_TIMER_HACK
21282   globalData.gcp_timer_save[1] = NdbTick_getCurrentTicks();
21283 #endif
21284 
21285   GCPSaveConf * const saveConf = (GCPSaveConf *)&signal->theData[0];
21286   saveConf->dihPtr = localGcpPtr.p->gcpUserptr;
21287   saveConf->nodeId = getOwnNodeId();
21288   saveConf->gci    = localGcpPtr.p->gcpId;
21289   ndbrequire(refToMain(localGcpPtr.p->gcpBlockref) == DBDIH ||
21290              refToMain(localGcpPtr.p->gcpBlockref) == DBLQH);
21291   sendSignal(localGcpPtr.p->gcpBlockref, GSN_GCP_SAVECONF, signal,
21292 	     GCPSaveConf::SignalLength, JBA);
21293   ccurrentGcprec = RNIL;
21294 }//Dblqh::execFSSYNCCONF()
21295 
21296 
21297 /* ######################################################################### */
21298 /* #######                            FILE HANDLING MODULE           ####### */
21299 /*                                                                           */
21300 /* ######################################################################### */
21301 /*       THIS MODULE HANDLES RESPONSE MESSAGES FROM THE FILE SYSTEM          */
21302 /* ######################################################################### */
21303 /* ######################################################################### */
21304 /*       SIGNAL RECEPTION MODULE                                             */
21305 /*       THIS MODULE IS A SUB-MODULE OF THE FILE SYSTEM HANDLING.            */
21306 /*                                                                           */
21307 /*  THIS MODULE CHECKS THE STATE AND JUMPS TO THE PROPER PART OF THE FILE    */
21308 /*  HANDLING MODULE.                                                         */
21309 /* ######################################################################### */
21310 /* *************** */
21311 /*  FSCLOSECONF  > */
21312 /* *************** */
execFSCLOSECONF(Signal * signal)21313 void Dblqh::execFSCLOSECONF(Signal* signal)
21314 {
21315   jamEntry();
21316   logFilePtr.i = signal->theData[0];
21317   ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
21318   logFilePtr.p->fileRef = RNIL;
21319 
21320   if (DEBUG_REDO)
21321   {
21322     logPartPtr.i = logFilePtr.p->logPartRec;
21323     ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
21324     ndbout_c("part: %u file: %u CLOSE CONF",
21325              logPartPtr.p->logPartNo,
21326              logFilePtr.p->fileNo);
21327   }
21328 
21329   switch (logFilePtr.p->logFileStatus) {
21330   case LogFileRecord::CLOSE_SR_READ_INVALIDATE_PAGES:
21331     jam();
21332     logFilePtr.p->logFileStatus = LogFileRecord::CLOSED;
21333 
21334     logPartPtr.i = logFilePtr.p->logPartRec;
21335     ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
21336 
21337     readFileInInvalidate(signal, 2);
21338     return;
21339 
21340   case LogFileRecord::CLOSE_SR_READ_INVALIDATE_SEARCH_FILES:
21341     jam();
21342     logFilePtr.p->logFileStatus = LogFileRecord::CLOSED;
21343 
21344     logPartPtr.i = logFilePtr.p->logPartRec;
21345     ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
21346 
21347     readFileInInvalidate(signal, 4);
21348     return;
21349   case LogFileRecord::CLOSE_SR_READ_INVALIDATE_SEARCH_LAST_FILE:
21350     logFilePtr.p->logFileStatus = LogFileRecord::CLOSED;
21351 
21352     logPartPtr.i = logFilePtr.p->logPartRec;
21353     ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
21354 
21355     readFileInInvalidate(signal, 7);
21356     return;
21357   case LogFileRecord::CLOSE_SR_WRITE_INVALIDATE_PAGES:
21358     jam();
21359     logFilePtr.p->logFileStatus = LogFileRecord::CLOSED;
21360 
21361     logPartPtr.i = logFilePtr.p->logPartRec;
21362     ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
21363 
21364     writeFileInInvalidate(signal, 1);
21365     return;
21366   case LogFileRecord::CLOSING_INIT:
21367     jam();
21368     logFileInitDone++ ;
21369     closingInitLab(signal);
21370     return;
21371   case LogFileRecord::CLOSING_SR:
21372     jam();
21373     closingSrLab(signal);
21374     return;
21375   case LogFileRecord::CLOSING_EXEC_SR:
21376     jam();
21377     closeExecSrLab(signal);
21378     return;
21379   case LogFileRecord::CLOSING_EXEC_SR_COMPLETED:
21380     jam();
21381     closeExecSrCompletedLab(signal);
21382     return;
21383   case LogFileRecord::CLOSING_WRITE_LOG:
21384     jam();
21385     closeWriteLogLab(signal);
21386     return;
21387   case LogFileRecord::CLOSING_EXEC_LOG:
21388     jam();
21389     closeExecLogLab(signal);
21390     return;
21391 #ifndef NO_REDO_OPEN_FILE_CACHE
21392   case LogFileRecord::CLOSING_EXEC_LOG_CACHED:
21393     jam();
21394     logFilePtr.p->logFileStatus = LogFileRecord::CLOSED;
21395     release(signal, m_redo_open_file_cache);
21396     return;
21397 #endif
21398   case LogFileRecord::CLOSING_SR_FRONTPAGE:
21399     jam();
21400     closingSrFrontPage(signal);
21401     return;
21402   default:
21403     jam();
21404     systemErrorLab(signal, __LINE__);
21405     return;
21406   }//switch
21407 }//Dblqh::execFSCLOSECONF()
21408 
21409 
21410 /* ************>> */
21411 /*  FSOPENCONF  > */
21412 /* ************>> */
execFSOPENCONF(Signal * signal)21413 void Dblqh::execFSOPENCONF(Signal* signal)
21414 {
21415   jamEntry();
21416 
21417 #ifdef ERROR_INSERT
21418   if (delayOpenFilePtrI > 0 && signal->theData[0] == delayOpenFilePtrI)
21419   {
21420     /* ERROR_INSERT 5090 : delay executing FSOPENCONF by sending
21421      * GSN_CONTINUEB in order to simulate a delay in opening a redo log file.
21422      * theData[0] of FSOPENCONF contains the LogFilePtr.i of the delayed file.
21423      * Add ZDELAY_FS_OPEN to theData[0] in addition to LogFilePtr.i,
21424      * in order to hint CONTINUEB to handle this signal.
21425      */
21426     Uint32 compact = signal->theData[0];
21427     signal->theData[0] = compact | (Uint32)ZDELAY_FS_OPEN <<16 ;
21428     sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
21429     return;
21430   }
21431 #endif
21432 
21433   initFsopenconf(signal);
21434   switch (logFilePtr.p->logFileStatus) {
21435   case LogFileRecord::OPEN_SR_READ_INVALIDATE_PAGES:
21436     jam();
21437     logFilePtr.p->logFileStatus = LogFileRecord::OPEN;
21438     readFileInInvalidate(signal, 0);
21439     return;
21440   case LogFileRecord::OPEN_SR_READ_INVALIDATE_SEARCH_FILES:
21441     jam();
21442     logFilePtr.p->logFileStatus = LogFileRecord::OPEN;
21443     readFileInInvalidate(signal, 5);
21444     return;
21445   case LogFileRecord::OPEN_SR_WRITE_INVALIDATE_PAGES:
21446     jam();
21447     logFilePtr.p->logFileStatus = LogFileRecord::OPEN;
21448     writeFileInInvalidate(signal, 0);
21449     return;
21450   case LogFileRecord::OPENING_INIT:
21451     jam();
21452     logFilePtr.p->logFileStatus = LogFileRecord::OPEN;
21453     openFileInitLab(signal);
21454     return;
21455   case LogFileRecord::OPEN_SR_FRONTPAGE:
21456     jam();
21457     logFilePtr.p->logFileStatus = LogFileRecord::OPEN;
21458     openSrFrontpageLab(signal);
21459     return;
21460   case LogFileRecord::OPEN_SR_LAST_FILE:
21461     jam();
21462     logFilePtr.p->logFileStatus = LogFileRecord::OPEN;
21463     openSrLastFileLab(signal);
21464     return;
21465   case LogFileRecord::OPEN_SR_NEXT_FILE:
21466     jam();
21467     logFilePtr.p->logFileStatus = LogFileRecord::OPEN;
21468     openSrNextFileLab(signal);
21469     return;
21470   case LogFileRecord::OPEN_EXEC_SR_START:
21471     jam();
21472     logFilePtr.p->logFileStatus = LogFileRecord::OPEN;
21473     openExecSrStartLab(signal);
21474     return;
21475   case LogFileRecord::OPEN_EXEC_SR_NEW_MBYTE:
21476     jam();
21477     logFilePtr.p->logFileStatus = LogFileRecord::OPEN;
21478     openExecSrNewMbyteLab(signal);
21479     return;
21480   case LogFileRecord::OPEN_SR_FOURTH_PHASE:
21481     jam();
21482     logFilePtr.p->logFileStatus = LogFileRecord::OPEN;
21483     openSrFourthPhaseLab(signal);
21484     return;
21485   case LogFileRecord::OPEN_SR_FOURTH_NEXT:
21486     jam();
21487     logFilePtr.p->logFileStatus = LogFileRecord::OPEN;
21488     openSrFourthNextLab(signal);
21489     return;
21490   case LogFileRecord::OPEN_SR_FOURTH_ZERO:
21491     jam();
21492     logFilePtr.p->logFileStatus = LogFileRecord::OPEN;
21493     openSrFourthZeroLab(signal);
21494     return;
21495   case LogFileRecord::OPENING_WRITE_LOG:
21496     jam();
21497     logFilePtr.p->logFileStatus = LogFileRecord::OPEN;
21498     return;
21499   case LogFileRecord::OPEN_EXEC_LOG:
21500     jam();
21501     logFilePtr.p->logFileStatus = LogFileRecord::OPEN;
21502 #ifndef NO_REDO_OPEN_FILE_CACHE
21503     {
21504       jam();
21505       m_redo_open_file_cache.m_lru.addFirst(logFilePtr);
21506     }
21507     // Fall through
21508   case LogFileRecord::OPEN_EXEC_LOG_CACHED:
21509     jam();
21510 #endif
21511     openExecLogLab(signal);
21512     return;
21513   default:
21514     jam();
21515     systemErrorLab(signal, __LINE__);
21516     return;
21517   }//switch
21518 }//Dblqh::execFSOPENCONF()
21519 
21520 void
execFSOPENREF(Signal * signal)21521 Dblqh::execFSOPENREF(Signal* signal)
21522 {
21523   jamEntry();
21524   FsRef* ref = (FsRef*)signal->getDataPtr();
21525   Uint32 err = ref->errorCode;
21526   if (err == FsRef::fsErrInvalidFileSize)
21527   {
21528     char buf[256];
21529     BaseString::snprintf(buf, sizeof(buf),
21530                          "Invalid file size for redo logfile, "
21531                          " size only changable with --initial");
21532     progError(__LINE__,
21533               NDBD_EXIT_INVALID_CONFIG,
21534               buf);
21535     return;
21536   }
21537 
21538   SimulatedBlock::execFSOPENREF(signal);
21539 }
21540 
21541 /* ************>> */
21542 /*  FSREADCONF  > */
21543 /* ************>> */
execFSREADCONF(Signal * signal)21544 void Dblqh::execFSREADCONF(Signal* signal)
21545 {
21546   jamEntry();
21547   initFsrwconf(signal, false);
21548 
21549   switch (lfoPtr.p->lfoState) {
21550   case LogFileOperationRecord::READ_SR_LAST_MBYTE:
21551     jam();
21552     releaseLfo(signal);
21553     readSrLastMbyteLab(signal);
21554     return;
21555   case LogFileOperationRecord::READ_SR_FRONTPAGE:
21556     jam();
21557     releaseLfo(signal);
21558     readSrFrontpageLab(signal);
21559     return;
21560   case LogFileOperationRecord::READ_SR_LAST_FILE:
21561     jam();
21562     releaseLfo(signal);
21563     readSrLastFileLab(signal);
21564     return;
21565   case LogFileOperationRecord::READ_SR_NEXT_FILE:
21566     jam();
21567     releaseLfo(signal);
21568     readSrNextFileLab(signal);
21569     return;
21570   case LogFileOperationRecord::READ_EXEC_SR:
21571     jam();
21572     readExecSrLab(signal);
21573     return;
21574   case LogFileOperationRecord::READ_EXEC_LOG:
21575     jam();
21576     readExecLogLab(signal);
21577     return;
21578   case LogFileOperationRecord::READ_SR_INVALIDATE_PAGES:
21579     jam();
21580     invalidateLogAfterLastGCI(signal);
21581     return;
21582   case LogFileOperationRecord::READ_SR_INVALIDATE_SEARCH_FILES:
21583     jam();
21584     invalidateLogAfterLastGCI(signal);
21585     return;
21586   case LogFileOperationRecord::READ_SR_FOURTH_PHASE:
21587     jam();
21588     releaseLfo(signal);
21589     readSrFourthPhaseLab(signal);
21590     return;
21591   case LogFileOperationRecord::READ_SR_FOURTH_ZERO:
21592     jam();
21593     releaseLfo(signal);
21594     readSrFourthZeroLab(signal);
21595     return;
21596   default:
21597     jam();
21598     systemErrorLab(signal, __LINE__);
21599     return;
21600   }//switch
21601 }//Dblqh::execFSREADCONF()
21602 
21603 /* ************>> */
21604 /*  FSREADCONF  > */
21605 /* ************>> */
execFSREADREF(Signal * signal)21606 void Dblqh::execFSREADREF(Signal* signal)
21607 {
21608   jamEntry();
21609   lfoPtr.i = signal->theData[0];
21610   ptrCheckGuard(lfoPtr, clfoFileSize, logFileOperationRecord);
21611   switch (lfoPtr.p->lfoState) {
21612   case LogFileOperationRecord::READ_SR_LAST_MBYTE:
21613     jam();
21614     break;
21615   case LogFileOperationRecord::READ_SR_FRONTPAGE:
21616     jam();
21617     break;
21618   case LogFileOperationRecord::READ_SR_LAST_FILE:
21619     jam();
21620     break;
21621   case LogFileOperationRecord::READ_SR_NEXT_FILE:
21622     jam();
21623     break;
21624   case LogFileOperationRecord::READ_EXEC_SR:
21625     jam();
21626     break;
21627   case LogFileOperationRecord::READ_EXEC_LOG:
21628     jam();
21629     break;
21630   case LogFileOperationRecord::READ_SR_FOURTH_PHASE:
21631     jam();
21632     break;
21633   case LogFileOperationRecord::READ_SR_FOURTH_ZERO:
21634     jam();
21635     break;
21636   case LogFileOperationRecord::READ_SR_INVALIDATE_PAGES:
21637     jam();
21638     break;
21639   default:
21640     jam();
21641     break;
21642   }//switch
21643   {
21644     char msg[100];
21645     sprintf(msg, "File system read failed during LogFileOperationRecord state %d", (Uint32)lfoPtr.p->lfoState);
21646     fsRefError(signal,__LINE__,msg);
21647   }
21648 }//Dblqh::execFSREADREF()
21649 
21650 /* *************** */
21651 /*  FSWRITECONF  > */
21652 /* *************** */
execFSWRITECONF(Signal * signal)21653 void Dblqh::execFSWRITECONF(Signal* signal)
21654 {
21655   jamEntry();
21656   initFsrwconf(signal, true);
21657   switch (lfoPtr.p->lfoState) {
21658   case LogFileOperationRecord::WRITE_SR_INVALIDATE_PAGES:
21659     jam();
21660     invalidateLogAfterLastGCI(signal);
21661     CRASH_INSERTION(5047);
21662     return;
21663   case LogFileOperationRecord::WRITE_PAGE_ZERO:
21664     jam();
21665     writePageZeroLab(signal, __LINE__);
21666     releaseLfo(signal);
21667     return;
21668   case LogFileOperationRecord::LAST_WRITE_IN_FILE:
21669     jam();
21670     lastWriteInFileLab(signal);
21671     return;
21672   case LogFileOperationRecord::INIT_WRITE_AT_END:
21673     jam();
21674     initWriteEndLab(signal);
21675     return;
21676   case LogFileOperationRecord::INIT_FIRST_PAGE:
21677     jam();
21678     logMBytesInitDone++;
21679     initFirstPageLab(signal);
21680     return;
21681   case LogFileOperationRecord::WRITE_GCI_ZERO:
21682     jam();
21683     writeGciZeroLab(signal);
21684     return;
21685   case LogFileOperationRecord::WRITE_DIRTY:
21686     jam();
21687     writeDirtyLab(signal);
21688     return;
21689   case LogFileOperationRecord::WRITE_INIT_MBYTE:
21690     jam();
21691     logMBytesInitDone++;
21692     writeInitMbyteLab(signal);
21693     return;
21694   case LogFileOperationRecord::ACTIVE_WRITE_LOG:
21695     jam();
21696     writeLogfileLab(signal);
21697     return;
21698   case LogFileOperationRecord::FIRST_PAGE_WRITE_IN_LOGFILE:
21699     jam();
21700     firstPageWriteLab(signal);
21701     return;
21702   case LogFileOperationRecord::WRITE_SR_INVALIDATE_PAGES_UPDATE_PAGE0:
21703     jam();
21704     // We are done...send completed signal and exit this phase.
21705     releaseLfo(signal);
21706     signal->theData[0] = ZSR_FOURTH_COMP;
21707     signal->theData[1] = logPartPtr.i;
21708     sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
21709     return;
21710   default:
21711     jam();
21712     systemErrorLab(signal, __LINE__);
21713     return;
21714   }//switch
21715 }//Dblqh::execFSWRITECONF()
21716 
21717 /* ************>> */
21718 /*  FSWRITEREF  > */
21719 /* ************>> */
execFSWRITEREF(Signal * signal)21720 void Dblqh::execFSWRITEREF(Signal* signal)
21721 {
21722   jamEntry();
21723   lfoPtr.i = signal->theData[0];
21724   ptrCheckGuard(lfoPtr, clfoFileSize, logFileOperationRecord);
21725   terrorCode = signal->theData[1];
21726   switch (lfoPtr.p->lfoState) {
21727   case LogFileOperationRecord::WRITE_PAGE_ZERO:
21728     jam();
21729     break;
21730   case LogFileOperationRecord::LAST_WRITE_IN_FILE:
21731     jam();
21732     break;
21733   case LogFileOperationRecord::INIT_WRITE_AT_END:
21734     jam();
21735     break;
21736   case LogFileOperationRecord::INIT_FIRST_PAGE:
21737     jam();
21738     break;
21739   case LogFileOperationRecord::WRITE_GCI_ZERO:
21740     jam();
21741     break;
21742   case LogFileOperationRecord::WRITE_DIRTY:
21743     jam();
21744     break;
21745   case LogFileOperationRecord::WRITE_INIT_MBYTE:
21746     jam();
21747     break;
21748   case LogFileOperationRecord::ACTIVE_WRITE_LOG:
21749     jam();
21750     break;
21751   case LogFileOperationRecord::FIRST_PAGE_WRITE_IN_LOGFILE:
21752     jam();
21753     break;
21754   case LogFileOperationRecord::WRITE_SR_INVALIDATE_PAGES:
21755     jam();
21756     systemErrorLab(signal, __LINE__);
21757   default:
21758     jam();
21759     break;
21760   }//switch
21761   {
21762     char msg[100];
21763     sprintf(msg, "File system write failed during LogFileOperationRecord state %d", (Uint32)lfoPtr.p->lfoState);
21764     fsRefError(signal,__LINE__,msg);
21765   }
21766 }//Dblqh::execFSWRITEREF()
21767 
21768 
21769 /* ========================================================================= */
21770 /* =======              INITIATE WHEN RECEIVING FSOPENCONF           ======= */
21771 /*                                                                           */
21772 /* ========================================================================= */
initFsopenconf(Signal * signal)21773 void Dblqh::initFsopenconf(Signal* signal)
21774 {
21775   logFilePtr.i = signal->theData[0];
21776   ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
21777   logFilePtr.p->fileRef = signal->theData[1];
21778   logPartPtr.i = logFilePtr.p->logPartRec;
21779   ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
21780   logFilePtr.p->currentMbyte = 0;
21781   logFilePtr.p->filePosition = 0;
21782 
21783   if (logFilePtr.p->fileChangeState == LogFileRecord::WAIT_FOR_OPEN_NEXT_FILE ||
21784       logFilePtr.p->fileChangeState == LogFileRecord::LAST_FILEWRITE_WAITS ||
21785       logFilePtr.p->fileChangeState == LogFileRecord::FIRST_FILEWRITE_WAITS)
21786   {
21787     jam();
21788     logPagePtr.i = logFilePtr.p->currentLogpage;
21789     ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
21790     writeFileHeaderOpen(signal, ZNORMAL);
21791     openNextLogfile(signal);
21792 
21793     if (logFilePtr.p->fileChangeState == LogFileRecord::WAIT_FOR_OPEN_NEXT_FILE)
21794       logFilePtr.p->fileChangeState = LogFileRecord::BOTH_WRITES_ONGOING;
21795     else if (logFilePtr.p->fileChangeState == LogFileRecord::LAST_FILEWRITE_WAITS)
21796       logFilePtr.p->fileChangeState = LogFileRecord::FIRST_WRITE_ONGOING;
21797     else if (logFilePtr.p->fileChangeState == LogFileRecord::FIRST_FILEWRITE_WAITS)
21798       logFilePtr.p->fileChangeState = LogFileRecord::LAST_WRITE_ONGOING;
21799   }
21800 
21801 }//Dblqh::initFsopenconf()
21802 
21803 /* ========================================================================= */
21804 /* =======       INITIATE WHEN RECEIVING FSREADCONF AND FSWRITECONF  ======= */
21805 /*                                                                           */
21806 /* ========================================================================= */
initFsrwconf(Signal * signal,bool write)21807 void Dblqh::initFsrwconf(Signal* signal, bool write)
21808 {
21809   LogPageRecordPtr logP;
21810   Uint32 noPages, totPages;
21811   lfoPtr.i = signal->theData[0];
21812   ptrCheckGuard(lfoPtr, clfoFileSize, logFileOperationRecord);
21813   totPages= lfoPtr.p->noPagesRw;
21814   logFilePtr.i = lfoPtr.p->logFileRec;
21815   ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
21816   logPartPtr.i = logFilePtr.p->logPartRec;
21817   ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
21818   logPagePtr.i = lfoPtr.p->firstLfoPage;
21819   ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
21820   logP= logPagePtr;
21821   noPages= 1;
21822   ndbassert(totPages > 0);
21823 
21824   if (write)
21825   {
21826     Uint32 bytesWritten = totPages * 32768;
21827     logPartPtr.p->m_io_tracker.complete_io(bytesWritten);
21828   }
21829 
21830   for (;;)
21831   {
21832     logP.p->logPageWord[ZPOS_IN_WRITING]= 0;
21833     logP.p->logPageWord[ZPOS_IN_FREE_LIST]= 0;
21834     if (noPages == totPages)
21835       return;
21836     if (write)
21837       logP.i= logP.p->logPageWord[ZNEXT_PAGE];
21838     else
21839       logP.i= lfoPtr.p->logPageArray[noPages];
21840     ptrCheckGuard(logP, clogPageFileSize, logPageRecord);
21841     noPages++;
21842   }
21843 
21844 }//Dblqh::initFsrwconf()
21845 
21846 /* ######################################################################### */
21847 /*       NORMAL OPERATION MODULE                                             */
21848 /*       THIS MODULE IS A SUB-MODULE OF THE FILE SYSTEM HANDLING.            */
21849 /*                                                                           */
21850 /*   THIS PART HANDLES THE NORMAL OPENING, CLOSING AND WRITING OF LOG FILES  */
21851 /*   DURING NORMAL OPERATION.                                                */
21852 /* ######################################################################### */
21853 /*---------------------------------------------------------------------------*/
21854 /* THIS SIGNAL IS USED TO SUPERVISE THAT THE LOG RECORDS ARE NOT KEPT IN MAIN*/
21855 /* MEMORY FOR MORE THAN 1 SECOND TO ACHIEVE THE PROPER RELIABILITY.          */
21856 /*---------------------------------------------------------------------------*/
timeSup(Signal * signal)21857 void Dblqh::timeSup(Signal* signal)
21858 {
21859   LogPageRecordPtr origLogPagePtr;
21860   Uint32 wordWritten;
21861 
21862   jamEntry();
21863   logPartPtr.i = signal->theData[0];
21864   ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
21865   logFilePtr.i = logPartPtr.p->currentLogfile;
21866   ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
21867   logPagePtr.i = logFilePtr.p->currentLogpage;
21868   ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
21869   if (logPartPtr.p->logPartTimer != logPartPtr.p->logTimer) {
21870     jam();
21871     if (true) // less merge conflicts
21872     {
21873 /*---------------------------------------------------------------------------*/
21874 /* IDLE AND NOT WRITTEN TO DISK IN A SECOND. ALSO WHEN WE HAVE A TAIL PROBLEM*/
21875 /* WE HAVE TO WRITE TO DISK AT TIMES. WE WILL FIRST CHECK WHETHER ANYTHING   */
21876 /* AT ALL HAVE BEEN WRITTEN TO THE PAGES BEFORE WRITING TO DISK.             */
21877 /*---------------------------------------------------------------------------*/
21878 /* WE HAVE TO WRITE TO DISK IN ALL CASES SINCE THERE COULD BE INFORMATION    */
21879 /* STILL IN THE LOG THAT WAS GENERATED BEFORE THE PREVIOUS TIME SUPERVISION  */
21880 /* BUT AFTER THE LAST DISK WRITE. THIS PREVIOUSLY STOPPED ALL DISK WRITES    */
21881 /* WHEN NO MORE LOG WRITES WERE PERFORMED (THIS HAPPENED WHEN LOG GOT FULL   */
21882 /* AND AFTER LOADING THE INITIAL RECORDS IN INITIAL START).                  */
21883 /*---------------------------------------------------------------------------*/
21884       if (((logFilePtr.p->currentFilepage + 1) & (ZPAGES_IN_MBYTE -1)) == 0) {
21885         jam();
21886 /*---------------------------------------------------------------------------*/
21887 /* THIS IS THE LAST PAGE IN THIS MBYTE. WRITE NEXT LOG AND SWITCH TO NEXT    */
21888 /* MBYTE.                                                                    */
21889 /*---------------------------------------------------------------------------*/
21890         changeMbyte(signal);
21891       } else {
21892 /*---------------------------------------------------------------------------*/
21893 /* WRITE THE LOG PAGE TO DISK EVEN IF IT IS NOT FULL. KEEP PAGE AND WRITE A  */
21894 /* COPY. THE ORIGINAL PAGE WILL BE WRITTEN AGAIN LATER ON.                   */
21895 /*---------------------------------------------------------------------------*/
21896         wordWritten = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] - 1;
21897         origLogPagePtr.i = logPagePtr.i;
21898         origLogPagePtr.p = logPagePtr.p;
21899         seizeLogpage(signal);
21900         MEMCOPY_NO_WORDS(&logPagePtr.p->logPageWord[0],
21901                          &origLogPagePtr.p->logPageWord[0],
21902                          wordWritten + 1);
21903         ndbrequire(wordWritten < ZPAGE_SIZE);
21904         if (logFilePtr.p->noLogpagesInBuffer > 0) {
21905           jam();
21906           completedLogPage(signal, ZENFORCE_WRITE, __LINE__);
21907 /*---------------------------------------------------------------------------*/
21908 /*SINCE WE ARE ONLY WRITING PART OF THE LAST PAGE WE HAVE TO UPDATE THE WORD */
21909 /*WRITTEN TO REFLECT THE REAL LAST WORD WRITTEN. WE ALSO HAVE TO MOVE THE    */
21910 /*FILE POSITION ONE STEP BACKWARDS SINCE WE ARE NOT WRITING THE LAST PAGE    */
21911 /*COMPLETELY. IT WILL BE WRITTEN AGAIN.                                      */
21912 /*---------------------------------------------------------------------------*/
21913           lfoPtr.p->lfoWordWritten = wordWritten;
21914           logFilePtr.p->filePosition = logFilePtr.p->filePosition - 1;
21915         } else {
21916           if (wordWritten == (ZPAGE_HEADER_SIZE - 1)) {
21917 /*---------------------------------------------------------------------------*/
21918 /*THIS IS POSSIBLE BUT VERY UNLIKELY. IF THE PAGE WAS COMPLETED AFTER THE LAST*/
21919 /*WRITE TO DISK THEN NO_LOG_PAGES_IN_BUFFER > 0 AND IF NOT WRITTEN SINCE LAST*/
21920 /*WRITE TO DISK THEN THE PREVIOUS PAGE MUST HAVE BEEN WRITTEN BY SOME        */
21921 /*OPERATION AND THAT BECAME COMPLETELY FULL. IN ANY CASE WE NEED NOT WRITE AN*/
21922 /*EMPTY PAGE TO DISK.                                                        */
21923 /*---------------------------------------------------------------------------*/
21924             jam();
21925             releaseLogpage(signal);
21926           } else {
21927             jam();
21928             writeSinglePage(signal, logFilePtr.p->currentFilepage,
21929                             wordWritten, __LINE__);
21930             lfoPtr.p->lfoState = LogFileOperationRecord::ACTIVE_WRITE_LOG;
21931           }//if
21932         }//if
21933       }//if
21934     }
21935   }
21936 
21937   logPartPtr.p->logTimer++;
21938   return;
21939 }//Dblqh::timeSup()
21940 
writeLogfileLab(Signal * signal)21941 void Dblqh::writeLogfileLab(Signal* signal)
21942 {
21943 /*---------------------------------------------------------------------------*/
21944 /* CHECK IF ANY GLOBAL CHECKPOINTS ARE COMPLETED DUE TO THIS COMPLETED DISK  */
21945 /* WRITE.                                                                    */
21946 /*---------------------------------------------------------------------------*/
21947   switch (logFilePtr.p->fileChangeState) {
21948   case LogFileRecord::NOT_ONGOING:
21949     jam();
21950     checkGcpCompleted(signal,
21951                       ((lfoPtr.p->lfoPageNo + lfoPtr.p->noPagesRw) - 1),
21952                       lfoPtr.p->lfoWordWritten);
21953     break;
21954 #if 0
21955   case LogFileRecord::BOTH_WRITES_ONGOING:
21956     jam();
21957     ndbout_c("not crashing!!");
21958     // Fall-through
21959 #endif
21960   case LogFileRecord::WRITE_PAGE_ZERO_ONGOING:
21961   case LogFileRecord::LAST_WRITE_ONGOING:
21962     jam();
21963     logFilePtr.p->lastPageWritten = (lfoPtr.p->lfoPageNo + lfoPtr.p->noPagesRw) - 1;
21964     logFilePtr.p->lastWordWritten = lfoPtr.p->lfoWordWritten;
21965     break;
21966   default:
21967     jam();
21968     systemErrorLab(signal, __LINE__);
21969     return;
21970     break;
21971   }//switch
21972   releaseLfoPages(signal);
21973   releaseLfo(signal);
21974   return;
21975 }//Dblqh::writeLogfileLab()
21976 
closeWriteLogLab(Signal * signal)21977 void Dblqh::closeWriteLogLab(Signal* signal)
21978 {
21979   logFilePtr.p->logFileStatus = LogFileRecord::CLOSED;
21980   return;
21981 }//Dblqh::closeWriteLogLab()
21982 
21983 /* ######################################################################### */
21984 /*       FILE CHANGE MODULE                                                  */
21985 /*       THIS MODULE IS A SUB-MODULE OF THE FILE SYSTEM HANDLING.            */
21986 /*                                                                           */
21987 /*THIS PART OF THE FILE MODULE HANDLES WHEN WE ARE CHANGING LOG FILE DURING  */
21988 /*NORMAL OPERATION. WE HAVE TO BE CAREFUL WHEN WE ARE CHANGING LOG FILE SO   */
21989 /*THAT WE DO NOT COMPLICATE THE SYSTEM RESTART PROCESS TOO MUCH.             */
21990 /*THE IDEA IS THAT WE START BY WRITING THE LAST WRITE IN THE OLD FILE AND WE */
21991 /*ALSO WRITE THE FIRST PAGE OF THE NEW FILE CONCURRENT WITH THAT. THIS FIRST */
21992 /*PAGE IN THE NEW FILE DO NOT CONTAIN ANY LOG RECORDS OTHER THAN A DESCRIPTOR*/
21993 /*CONTAINING INFORMATION ABOUT GCI'S NEEDED AT SYSTEM RESTART AND A NEXT LOG */
21994 /*RECORD.                                                                    */
21995 /*                                                                           */
21996 /*WHEN BOTH OF THOSE WRITES HAVE COMPLETED WE ALSO WRITE PAGE ZERO IN FILE   */
21997 /*ZERO. THE ONLY INFORMATION WHICH IS INTERESTING HERE IS THE NEW FILE NUMBER*/
21998 /*                                                                           */
21999 /*IF OPTIMISATIONS ARE NEEDED OF THE LOG HANDLING THEN IT IS POSSIBLE TO     */
22000 /*AVOID WRITING THE FIRST PAGE OF THE NEW PAGE IMMEDIATELY. THIS COMPLICATES */
22001 /*THE SYSTEM RESTART AND ONE HAS TO TAKE SPECIAL CARE WITH FILE ZERO. IT IS  */
22002 /*HOWEVER NO LARGE PROBLEM TO CHANGE INTO THIS SCENARIO. TO AVOID ALSO THE   */
22003 /*WRITING OF PAGE ZERO IS ALSO POSSIBLE BUT COMPLICATES THE DESIGN EVEN      */
22004 /*FURTHER. IT GETS FAIRLY COMPLEX TO FIND THE END OF THE LOG. SOME SORT OF   */
22005 /*BINARY SEARCH IS HOWEVER MOST LIKELY A GOOD METHODOLOGY FOR THIS.          */
22006 /* ######################################################################### */
firstPageWriteLab(Signal * signal)22007 void Dblqh::firstPageWriteLab(Signal* signal)
22008 {
22009   releaseLfo(signal);
22010 /*---------------------------------------------------------------------------*/
22011 /*       RELEASE PAGE ZERO IF THE FILE IS NOT FILE 0.                        */
22012 /*---------------------------------------------------------------------------*/
22013   Uint32 fileNo = logFilePtr.p->fileNo;
22014   if (fileNo != 0) {
22015     jam();
22016     releaseLogpage(signal);
22017   }//if
22018 /*---------------------------------------------------------------------------*/
22019 /* IF A NEW FILE HAS BEEN OPENED WE SHALL ALWAYS ALSO WRITE TO PAGE O IN     */
22020 /* FILE 0. THE AIM IS TO MAKE RESTARTS EASIER BY SPECIFYING WHICH IS THE     */
22021 /* LAST FILE WHERE LOGGING HAS STARTED.                                      */
22022 /*---------------------------------------------------------------------------*/
22023 /* FIRST CHECK WHETHER THE NEXT FILE IS OPENED AND THEN                      */
22024 /* THE LAST WRITE IN THE PREVIOUS FILE HAVE COMPLETED                        */
22025 /*---------------------------------------------------------------------------*/
22026   if (logFilePtr.p->fileChangeState == LogFileRecord::WAIT_FOR_OPEN_NEXT_FILE)
22027   {
22028     jam();
22029     logFilePtr.p->fileChangeState = LogFileRecord::FIRST_FILEWRITE_WAITS;
22030     return;
22031   }
22032   else if (logFilePtr.p->fileChangeState == LogFileRecord::BOTH_WRITES_ONGOING) {
22033     jam();
22034 /*---------------------------------------------------------------------------*/
22035 /* THE LAST WRITE WAS STILL ONGOING.                                         */
22036 /*---------------------------------------------------------------------------*/
22037     logFilePtr.p->fileChangeState = LogFileRecord::LAST_WRITE_ONGOING;
22038     return;
22039   } else {
22040     jam();
22041     ndbrequire(logFilePtr.p->fileChangeState == LogFileRecord::FIRST_WRITE_ONGOING);
22042 /*---------------------------------------------------------------------------*/
22043 /* WRITE TO PAGE 0 IN IN FILE 0 NOW.                                         */
22044 /*---------------------------------------------------------------------------*/
22045     logFilePtr.p->fileChangeState = LogFileRecord::WRITE_PAGE_ZERO_ONGOING;
22046     if (fileNo == 0) {
22047       jam();
22048 /*---------------------------------------------------------------------------*/
22049 /* IF THE NEW FILE WAS 0 THEN WE HAVE ALREADY WRITTEN PAGE ZERO IN FILE 0.   */
22050 /*---------------------------------------------------------------------------*/
22051       // use writePageZeroLab to make sure that same code as normal is run
22052       writePageZeroLab(signal, __LINE__);
22053       return;
22054     } else {
22055       jam();
22056 /*---------------------------------------------------------------------------*/
22057 /* WRITE PAGE ZERO IN FILE ZERO. LOG_FILE_REC WILL REFER TO THE LOG FILE WE  */
22058 /* HAVE JUST WRITTEN PAGE ZERO IN TO GET HOLD OF LOG_FILE_PTR FOR THIS       */
22059 /* RECORD QUICKLY. THIS IS NEEDED TO GET HOLD OF THE FILE_CHANGE_STATE.      */
22060 /* THE ONLY INFORMATION WE WANT TO CHANGE IS THE LAST FILE NUMBER IN THE     */
22061 /* FILE DESCRIPTOR. THIS IS USED AT SYSTEM RESTART TO FIND THE END OF THE    */
22062 /* LOG PART.                                                                 */
22063 /*---------------------------------------------------------------------------*/
22064       Uint32 currLogFile = logFilePtr.i;
22065       logFilePtr.i = logPartPtr.p->firstLogfile;
22066       ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
22067       logPagePtr.i = logFilePtr.p->logPageZero;
22068       ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
22069       logPagePtr.p->logPageWord[ZPAGE_HEADER_SIZE + ZPOS_FILE_NO] = fileNo;
22070       writeSinglePage(signal, 0, ZPAGE_SIZE - 1, __LINE__);
22071       lfoPtr.p->logFileRec = currLogFile;
22072       lfoPtr.p->lfoState = LogFileOperationRecord::WRITE_PAGE_ZERO;
22073       return;
22074     }//if
22075   }//if
22076 }//Dblqh::firstPageWriteLab()
22077 
lastWriteInFileLab(Signal * signal)22078 void Dblqh::lastWriteInFileLab(Signal* signal)
22079 {
22080   LogFileRecordPtr locLogFilePtr;
22081 /*---------------------------------------------------------------------------*/
22082 /* CHECK IF ANY GLOBAL CHECKPOINTS ARE COMPLETED DUE TO THIS COMPLETED DISK  */
22083 /* WRITE.                                                                    */
22084 /*---------------------------------------------------------------------------*/
22085   checkGcpCompleted(signal,
22086                     ((lfoPtr.p->lfoPageNo + lfoPtr.p->noPagesRw) - 1),
22087                     (ZPAGE_SIZE - 1));
22088   releaseLfoPages(signal);
22089   releaseLfo(signal);
22090 /*---------------------------------------------------------------------------*/
22091 /* IF THE FILE IS NOT IN USE OR THE NEXT FILE TO BE USED WE WILL CLOSE IT.   */
22092 /*---------------------------------------------------------------------------*/
22093   locLogFilePtr.i = logPartPtr.p->currentLogfile;
22094   ptrCheckGuard(locLogFilePtr, clogFileFileSize, logFileRecord);
22095   if (logFilePtr.i != locLogFilePtr.i) {
22096     if (logFilePtr.i != locLogFilePtr.p->nextLogFile) {
22097       if (logFilePtr.p->fileNo != 0) {
22098         jam();
22099 /*---------------------------------------------------------------------------*/
22100 /* THE FILE IS NOT FILE ZERO EITHER. WE WILL NOT CLOSE FILE ZERO SINCE WE    */
22101 /* USE IT TO KEEP TRACK OF THE CURRENT LOG FILE BY WRITING PAGE ZERO IN      */
22102 /* FILE ZERO.                                                                */
22103 /*---------------------------------------------------------------------------*/
22104 /* WE WILL CLOSE THE FILE.                                                   */
22105 /*---------------------------------------------------------------------------*/
22106         logFilePtr.p->logFileStatus = LogFileRecord::CLOSING_WRITE_LOG;
22107         closeFile(signal, logFilePtr, __LINE__);
22108       }//if
22109     }//if
22110   }//if
22111 /*---------------------------------------------------------------------------*/
22112 /* IF A NEW FILE HAS BEEN OPENED WE SHALL ALWAYS ALSO WRITE TO PAGE O IN     */
22113 /* FILE 0. THE AIM IS TO MAKE RESTARTS EASIER BY SPECIFYING WHICH IS THE     */
22114 /* LAST FILE WHERE LOGGING HAS STARTED.                                      */
22115 /*---------------------------------------------------------------------------*/
22116 /* FIRST CHECK WHETHER THE NEXT FILE IS OPENED AND THEN                      */
22117 /* THE FIRST WRITE IN THE NEW FILE HAVE COMPLETED                            */
22118 /* THIS STATE INFORMATION IS IN THE NEW LOG FILE AND THUS WE HAVE TO MOVE    */
22119 /* THE LOG FILE POINTER TO THIS LOG FILE.                                    */
22120 /*---------------------------------------------------------------------------*/
22121   logFilePtr.i = logFilePtr.p->nextLogFile;
22122   ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
22123 
22124   if (logFilePtr.p->fileChangeState == LogFileRecord::WAIT_FOR_OPEN_NEXT_FILE)
22125   {
22126     jam();
22127     logFilePtr.p->fileChangeState = LogFileRecord::LAST_FILEWRITE_WAITS;
22128     return;
22129   }
22130   else if (logFilePtr.p->fileChangeState == LogFileRecord::BOTH_WRITES_ONGOING) {
22131     jam();
22132 /*---------------------------------------------------------------------------*/
22133 /* THE FIRST WRITE WAS STILL ONGOING.                                        */
22134 /*---------------------------------------------------------------------------*/
22135     logFilePtr.p->fileChangeState = LogFileRecord::FIRST_WRITE_ONGOING;
22136     return;
22137   } else {
22138     ndbrequire(logFilePtr.p->fileChangeState == LogFileRecord::LAST_WRITE_ONGOING);
22139 /*---------------------------------------------------------------------------*/
22140 /* WRITE TO PAGE 0 IN IN FILE 0 NOW.                                         */
22141 /*---------------------------------------------------------------------------*/
22142     logFilePtr.p->fileChangeState = LogFileRecord::WRITE_PAGE_ZERO_ONGOING;
22143     Uint32 fileNo = logFilePtr.p->fileNo;
22144     if (fileNo == 0) {
22145       jam();
22146 /*---------------------------------------------------------------------------*/
22147 /* IF THE NEW FILE WAS 0 THEN WE HAVE ALREADY WRITTEN PAGE ZERO IN FILE 0.   */
22148 /*---------------------------------------------------------------------------*/
22149       // use writePageZeroLab to make sure that same code as normal is run
22150       writePageZeroLab(signal, __LINE__);
22151       return;
22152     } else {
22153       jam();
22154 /*---------------------------------------------------------------------------*/
22155 /* WRITE PAGE ZERO IN FILE ZERO. LOG_FILE_REC WILL REFER TO THE LOG FILE WE  */
22156 /* HAVE JUST WRITTEN PAGE ZERO IN TO GET HOLD OF LOG_FILE_PTR FOR THIS       */
22157 /* RECORD QUICKLY. THIS IS NEEDED TO GET HOLD OF THE FILE_CHANGE_STATE.      */
22158 /* THE ONLY INFORMATION WE WANT TO CHANGE IS THE LAST FILE NUMBER IN THE     */
22159 /* FILE DESCRIPTOR. THIS IS USED AT SYSTEM RESTART TO FIND THE END OF THE    */
22160 /* LOG PART.                                                                 */
22161 /*---------------------------------------------------------------------------*/
22162       Uint32 currLogFile = logFilePtr.i;
22163       logFilePtr.i = logPartPtr.p->firstLogfile;
22164       ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
22165       logPagePtr.i = logFilePtr.p->logPageZero;
22166       ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
22167       logPagePtr.p->logPageWord[ZPAGE_HEADER_SIZE + ZPOS_FILE_NO] = fileNo;
22168       writeSinglePage(signal, 0, ZPAGE_SIZE - 1, __LINE__);
22169       lfoPtr.p->logFileRec = currLogFile;
22170       lfoPtr.p->lfoState = LogFileOperationRecord::WRITE_PAGE_ZERO;
22171       return;
22172     }//if
22173   }//if
22174 }//Dblqh::lastWriteInFileLab()
22175 
writePageZeroLab(Signal * signal,Uint32 from)22176 void Dblqh::writePageZeroLab(Signal* signal, Uint32 from)
22177 {
22178   if ((logPartPtr.p->m_log_problems & LogPartRecord::P_FILE_CHANGE_PROBLEM)!= 0)
22179   {
22180     jam();
22181     update_log_problem(signal, logPartPtr,
22182                        LogPartRecord::P_FILE_CHANGE_PROBLEM,
22183                        /* clear */ false);
22184   }
22185 
22186   logFilePtr.p->fileChangeState = LogFileRecord::NOT_ONGOING;
22187 
22188 /*---------------------------------------------------------------------------*/
22189 /* IT COULD HAVE ARRIVED PAGE WRITES TO THE CURRENT FILE WHILE WE WERE       */
22190 /* WAITING FOR THIS DISK WRITE TO COMPLETE. THEY COULD NOT CHECK FOR         */
22191 /* COMPLETED GLOBAL CHECKPOINTS. THUS WE SHOULD DO THAT NOW INSTEAD.         */
22192 /*---------------------------------------------------------------------------*/
22193   bool res = checkGcpCompleted(signal,
22194                                logFilePtr.p->lastPageWritten,
22195                                logFilePtr.p->lastWordWritten);
22196   if (res && false)
22197   {
22198     gcpPtr.i = ccurrentGcprec;
22199     ptrCheckGuard(gcpPtr, cgcprecFileSize, gcpRecord);
22200 
22201     infoEvent("Completing GCP %u in writePageZeroLab from %u",
22202               gcpPtr.p->gcpId, from);
22203   }
22204   return;
22205 }//Dblqh::writePageZeroLab()
22206 
22207 /* ######################################################################### */
22208 /*       INITIAL START MODULE                                                */
22209 /*       THIS MODULE IS A SUB-MODULE OF THE FILE SYSTEM HANDLING.            */
22210 /*                                                                           */
22211 /*THIS MODULE INITIALISES ALL THE LOG FILES THAT ARE NEEDED AT A SYSTEM      */
22212 /*RESTART AND WHICH ARE USED DURING NORMAL OPERATIONS. IT CREATES THE FILES  */
22213 /*AND SETS A PROPER SIZE OF THEM AND INITIALISES THE FIRST PAGE IN EACH FILE */
22214 /* ######################################################################### */
openFileInitLab(Signal * signal)22215 void Dblqh::openFileInitLab(Signal* signal)
22216 {
22217   logFilePtr.p->logFileStatus = LogFileRecord::OPEN_INIT;
22218   seizeLogpage(signal);
22219   if (m_use_om_init == 0)
22220   {
22221     jam();
22222     initLogpage(signal);
22223     writeSinglePage(signal, (clogFileSize * ZPAGES_IN_MBYTE) - 1,
22224                     ZPAGE_SIZE - 1, __LINE__, false);
22225     lfoPtr.p->lfoState = LogFileOperationRecord::INIT_WRITE_AT_END;
22226   }
22227   else
22228   {
22229     jam();
22230     seizeLfo(signal);
22231     initWriteEndLab(signal);
22232   }
22233   return;
22234 }//Dblqh::openFileInitLab()
22235 
initWriteEndLab(Signal * signal)22236 void Dblqh::initWriteEndLab(Signal* signal)
22237 {
22238   releaseLfo(signal);
22239   initLogpage(signal);
22240   if (logFilePtr.p->fileNo == 0) {
22241     jam();
22242 /*---------------------------------------------------------------------------*/
22243 /* PAGE ZERO IN FILE ZERO MUST SET LOG LAP TO ONE SINCE IT HAS STARTED       */
22244 /* WRITING TO THE LOG, ALSO GLOBAL CHECKPOINTS ARE SET TO ZERO.              */
22245 /* Set number of log parts used to ensure we use correct number of log parts */
22246 /* at system restart. Was previously hardcoded to 4.                         */
22247 /*---------------------------------------------------------------------------*/
22248     logPagePtr.p->logPageWord[ZPOS_NO_LOG_PARTS]= globalData.ndbLogParts;
22249     logPagePtr.p->logPageWord[ZPOS_LOG_LAP] = 1;
22250     logPagePtr.p->logPageWord[ZPOS_MAX_GCI_STARTED] = 0;
22251     logPagePtr.p->logPageWord[ZPOS_MAX_GCI_COMPLETED] = 0;
22252     logFilePtr.p->logMaxGciStarted[0] = 0;
22253     logFilePtr.p->logMaxGciCompleted[0] = 0;
22254   }//if
22255 /*---------------------------------------------------------------------------*/
22256 /* REUSE CODE FOR INITIALISATION OF FIRST PAGE IN ALL LOG FILES.             */
22257 /*---------------------------------------------------------------------------*/
22258   writeFileHeaderOpen(signal, ZINIT);
22259   return;
22260 }//Dblqh::initWriteEndLab()
22261 
initFirstPageLab(Signal * signal)22262 void Dblqh::initFirstPageLab(Signal* signal)
22263 {
22264   releaseLfo(signal);
22265   if (logFilePtr.p->fileNo == 0) {
22266     jam();
22267 /*---------------------------------------------------------------------------*/
22268 /* IN FILE ZERO WE WILL INSERT A PAGE ONE WHERE WE WILL INSERT A COMPLETED   */
22269 /* GCI RECORD FOR GCI = 0.                                                   */
22270 /*---------------------------------------------------------------------------*/
22271     initLogpage(signal);
22272     logPagePtr.p->logPageWord[ZPOS_LOG_LAP] = 1;
22273     logPagePtr.p->logPageWord[ZPAGE_HEADER_SIZE] = ZCOMPLETED_GCI_TYPE;
22274     logPagePtr.p->logPageWord[ZPAGE_HEADER_SIZE + 1] = 1;
22275     writeSinglePage(signal, 1, ZPAGE_SIZE - 1, __LINE__, false);
22276     lfoPtr.p->lfoState = LogFileOperationRecord::WRITE_GCI_ZERO;
22277     return;
22278   }//if
22279   logFilePtr.p->currentMbyte = 1;
22280   writeInitMbyte(signal);
22281   return;
22282 }//Dblqh::initFirstPageLab()
22283 
writeGciZeroLab(Signal * signal)22284 void Dblqh::writeGciZeroLab(Signal* signal)
22285 {
22286   releaseLfo(signal);
22287   logFilePtr.p->currentMbyte = 1;
22288   writeInitMbyte(signal);
22289   return;
22290 }//Dblqh::writeGciZeroLab()
22291 
writeInitMbyteLab(Signal * signal)22292 void Dblqh::writeInitMbyteLab(Signal* signal)
22293 {
22294   releaseLfo(signal);
22295   logFilePtr.p->currentMbyte = logFilePtr.p->currentMbyte + 1;
22296   if (logFilePtr.p->currentMbyte == clogFileSize) {
22297     jam();
22298     releaseLogpage(signal);
22299     logFilePtr.p->logFileStatus = LogFileRecord::CLOSING_INIT;
22300     closeFile(signal, logFilePtr, __LINE__);
22301     return;
22302   }//if
22303   writeInitMbyte(signal);
22304   return;
22305 }//Dblqh::writeInitMbyteLab()
22306 
closingInitLab(Signal * signal)22307 void Dblqh::closingInitLab(Signal* signal)
22308 {
22309   logFilePtr.p->logFileStatus = LogFileRecord::CLOSED;
22310   logPartPtr.i = logFilePtr.p->logPartRec;
22311   ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
22312   if (logFilePtr.p->nextLogFile == logPartPtr.p->firstLogfile) {
22313     jam();
22314     checkInitCompletedLab(signal);
22315     return;
22316   } else {
22317     jam();
22318     logFilePtr.i = logFilePtr.p->nextLogFile;
22319     ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
22320     openLogfileInit(signal);
22321   }//if
22322   return;
22323 }//Dblqh::closingInitLab()
22324 
checkInitCompletedLab(Signal * signal)22325 void Dblqh::checkInitCompletedLab(Signal* signal)
22326 {
22327   logPartPtr.p->logPartState = LogPartRecord::SR_FIRST_PHASE_COMPLETED;
22328   g_eventLogger->info("LDM(%u): Completed REDO log initialisation of"
22329                       " logPart = %u",
22330                       instance(),
22331                       logPartPtr.i);
22332   csrExecUndoLogState = EULS_COMPLETED;
22333 /*---------------------------------------------------------------------------*/
22334 /* WE HAVE NOW INITIALISED ALL FILES IN THIS LOG PART. WE CAN NOW SET THE    */
22335 /* THE LOG LAP TO ONE SINCE WE WILL START WITH LOG LAP ONE. LOG LAP = ZERO   */
22336 /* MEANS THIS PART OF THE LOG IS NOT WRITTEN YET.                            */
22337 /*---------------------------------------------------------------------------*/
22338   logPartPtr.p->logLap = 1;
22339 
22340   if (m_use_om_init && ++logPartPtr.i != clogPartFileSize)
22341   {
22342     jam();
22343     ptrAss(logPartPtr, logPartRecord);
22344     logFilePtr.i = logPartPtr.p->firstLogfile;
22345     ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
22346     openLogfileInit(signal);
22347     return;
22348   }
22349 
22350   for (logPartPtr.i = 0; logPartPtr.i < clogPartFileSize; logPartPtr.i++)
22351   {
22352     jam();
22353     ptrAss(logPartPtr, logPartRecord);
22354     if (logPartPtr.p->logPartState != LogPartRecord::SR_FIRST_PHASE_COMPLETED)
22355     {
22356       jam();
22357 /*---------------------------------------------------------------------------*/
22358 /* THIS PART HAS STILL NOT COMPLETED. WAIT FOR THIS TO OCCUR.                */
22359 /*---------------------------------------------------------------------------*/
22360       return;
22361     }//if
22362   }
22363 
22364 #if defined(USE_INIT_GLOBAL_VARIABLES)
22365   enable_global_variables();
22366 #endif
22367   g_eventLogger->info("LDM(%u): Completed REDO initialisation",
22368                       instance());
22369   logfileInitCompleteReport(signal);
22370   sendNdbSttorryLab(signal);
22371 }
22372 
22373 /* ========================================================================= */
22374 /* =======       INITIATE LOG FILE OPERATION RECORD WHEN ALLOCATED   ======= */
22375 /*                                                                           */
22376 /* ========================================================================= */
initLfo(Signal * signal)22377 void Dblqh::initLfo(Signal* signal)
22378 {
22379   lfoPtr.p->firstLfoPage = RNIL;
22380   lfoPtr.p->lfoState = LogFileOperationRecord::IDLE;
22381   lfoPtr.p->logFileRec = logFilePtr.i;
22382   lfoPtr.p->noPagesRw = 0;
22383   lfoPtr.p->lfoPageNo = ZNIL;
22384 }//Dblqh::initLfo()
22385 
22386 /* ========================================================================= */
22387 /* =======              INITIATE LOG FILE WHEN ALLOCATED             ======= */
22388 /*                                                                           */
22389 /*       INPUT:  TFILE_NO        NUMBER OF THE FILE INITIATED                */
22390 /*               LOG_PART_PTR    NUMBER OF LOG PART                          */
22391 /*       SUBROUTINE SHORT NAME = IL                                          */
22392 /* ========================================================================= */
initLogfile(Signal * signal,Uint32 fileNo)22393 void Dblqh::initLogfile(Signal* signal, Uint32 fileNo)
22394 {
22395   UintR tilTmp;
22396   UintR tilIndex;
22397 
22398   logFilePtr.p->currentFilepage = 0;
22399   logFilePtr.p->currentLogpage = RNIL;
22400   logFilePtr.p->fileName[0] = (UintR)-1;
22401   logFilePtr.p->fileName[1] = (UintR)-1;	/* = H'FFFFFFFF = -1 */
22402   logFilePtr.p->fileName[2] = fileNo;	        /* Sfile_no */
22403   tilTmp = 1;	                        /* VERSION 1 OF FILE NAME */
22404   tilTmp = (tilTmp << 8) + 1;	    /* FRAGMENT LOG => .FRAGLOG AS EXTENSION */
22405   tilTmp = (tilTmp << 8) + (8 + logPartPtr.p->logPartNo); /* DIRECTORY = D(8+Part)/DBLQH */
22406   tilTmp = (tilTmp << 8) + 255;	              /* IGNORE Pxx PART OF FILE NAME */
22407   logFilePtr.p->fileName[3] = tilTmp;
22408 /* ========================================================================= */
22409 /*       FILE NAME BECOMES /D2/DBLQH/Tpart_no/Sfile_no.FRAGLOG               */
22410 /* ========================================================================= */
22411   logFilePtr.p->fileNo = fileNo;
22412   logFilePtr.p->filePosition = 0;
22413   logFilePtr.p->firstLfo = RNIL;
22414   logFilePtr.p->lastLfo = RNIL;
22415   logFilePtr.p->logFileStatus = LogFileRecord::CLOSED;
22416   logFilePtr.p->logPartRec = logPartPtr.i;
22417   logFilePtr.p->noLogpagesInBuffer = 0;
22418   logFilePtr.p->firstFilledPage = RNIL;
22419   logFilePtr.p->lastFilledPage = RNIL;
22420   logFilePtr.p->lastPageWritten = 0;
22421   logFilePtr.p->logPageZero = RNIL;
22422   logFilePtr.p->currentMbyte = 0;
22423   for (tilIndex = 0; tilIndex < clogFileSize; tilIndex++) {
22424     logFilePtr.p->logMaxGciCompleted[tilIndex] = (UintR)-1;
22425     logFilePtr.p->logMaxGciStarted[tilIndex] = (UintR)-1;
22426     logFilePtr.p->logLastPrepRef[tilIndex] = 0;
22427   }//for
22428 }//Dblqh::initLogfile()
22429 
22430 /* ========================================================================= */
22431 /* =======              INITIATE LOG PAGE WHEN ALLOCATED             ======= */
22432 /*                                                                           */
22433 /* ========================================================================= */
initLogpage(Signal * signal)22434 void Dblqh::initLogpage(Signal* signal)
22435 {
22436   TcConnectionrecPtr ilpTcConnectptr;
22437 
22438   /* Ensure all non-used header words are zero */
22439   bzero(logPagePtr.p, sizeof(Uint32) * ZPAGE_HEADER_SIZE);
22440   logPagePtr.p->logPageWord[ZPOS_LOG_LAP] = logPartPtr.p->logLap;
22441   logPagePtr.p->logPageWord[ZPOS_MAX_GCI_COMPLETED] =
22442         logPartPtr.p->logPartNewestCompletedGCI;
22443   logPagePtr.p->logPageWord[ZPOS_MAX_GCI_STARTED] = cnewestGci;
22444   logPagePtr.p->logPageWord[ZPOS_VERSION] = NDB_VERSION;
22445   logPagePtr.p->logPageWord[ZPOS_NO_LOG_FILES] = logPartPtr.p->noLogFiles;
22446   logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = ZPAGE_HEADER_SIZE;
22447   logPagePtr.p->logPageWord[ZPOS_NO_LOG_PARTS]= globalData.ndbLogParts;
22448   ilpTcConnectptr.i = logPartPtr.p->firstLogTcrec;
22449   if (ilpTcConnectptr.i != RNIL) {
22450     jam();
22451     ndbrequire(tcConnect_pool.getValidPtr(ilpTcConnectptr));
22452     logPagePtr.p->logPageWord[ZLAST_LOG_PREP_REF] =
22453       (ilpTcConnectptr.p->logStartFileNo << 16) +
22454       (ilpTcConnectptr.p->logStartPageNo >> ZTWOLOG_NO_PAGES_IN_MBYTE);
22455   } else {
22456     jam();
22457     logPagePtr.p->logPageWord[ZLAST_LOG_PREP_REF] =
22458       (logFilePtr.p->fileNo << 16) +
22459       (logFilePtr.p->currentFilepage >> ZTWOLOG_NO_PAGES_IN_MBYTE);
22460   }//if
22461 }//Dblqh::initLogpage()
22462 
22463 /* ------------------------------------------------------------------------- */
22464 /* -------               OPEN LOG FILE FOR READ AND WRITE            ------- */
22465 /*                                                                           */
22466 /*       SUBROUTINE SHORT NAME = OFR                                         */
22467 /* ------------------------------------------------------------------------- */
openFileRw(Signal * signal,LogFileRecordPtr olfLogFilePtr,bool writeBuffer)22468 void Dblqh::openFileRw(Signal* signal,
22469                        LogFileRecordPtr olfLogFilePtr,
22470                        bool writeBuffer)
22471 {
22472   FsOpenReq* req = (FsOpenReq*)signal->getDataPtrSend();
22473   signal->theData[0] = cownref;
22474   signal->theData[1] = olfLogFilePtr.i;
22475   signal->theData[2] = olfLogFilePtr.p->fileName[0];
22476   signal->theData[3] = olfLogFilePtr.p->fileName[1];
22477   signal->theData[4] = olfLogFilePtr.p->fileName[2];
22478   signal->theData[5] = olfLogFilePtr.p->fileName[3];
22479   signal->theData[6] = FsOpenReq::OM_READWRITE |
22480                        FsOpenReq::OM_AUTOSYNC |
22481                        FsOpenReq::OM_CHECK_SIZE;
22482   if (c_o_direct)
22483   {
22484     jam();
22485     signal->theData[6] |= FsOpenReq::OM_DIRECT;
22486     if (c_o_direct_sync_flag)
22487     {
22488       jam();
22489       signal->theData[6] |= FsOpenReq::OM_DIRECT_SYNC;
22490     }
22491   }
22492   if (writeBuffer)
22493   {
22494     signal->theData[6] |= FsOpenReq::OM_WRITE_BUFFER;
22495   }
22496 
22497   req->auto_sync_size = MAX_REDO_PAGES_WITHOUT_SYNCH * sizeof(LogPageRecord);
22498   Uint64 sz = clogFileSize;
22499   sz *= 1024; sz *= 1024;
22500   req->file_size_hi = (Uint32)(sz >> 32);
22501   req->file_size_lo = (Uint32)(sz & 0xFFFFFFFF);
22502   sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, FsOpenReq::SignalLength, JBA);
22503 }//Dblqh::openFileRw()
22504 
22505 /* ------------------------------------------------------------------------- */
22506 /* -------               OPEN LOG FILE DURING INITIAL START          ------- */
22507 /*                                                                           */
22508 /*       SUBROUTINE SHORT NAME = OLI                                         */
22509 /* ------------------------------------------------------------------------- */
openLogfileInit(Signal * signal)22510 void Dblqh::openLogfileInit(Signal* signal)
22511 {
22512   logFilePtr.p->logFileStatus = LogFileRecord::OPENING_INIT;
22513   FsOpenReq* req = (FsOpenReq*)signal->getDataPtrSend();
22514   signal->theData[0] = cownref;
22515   signal->theData[1] = logFilePtr.i;
22516   signal->theData[2] = logFilePtr.p->fileName[0];
22517   signal->theData[3] = logFilePtr.p->fileName[1];
22518   signal->theData[4] = logFilePtr.p->fileName[2];
22519   signal->theData[5] = logFilePtr.p->fileName[3];
22520   signal->theData[6] = FsOpenReq::OM_READWRITE |
22521                        FsOpenReq::OM_TRUNCATE |
22522                        FsOpenReq::OM_CREATE |
22523                        FsOpenReq::OM_AUTOSYNC |
22524                        FsOpenReq::OM_WRITE_BUFFER;
22525   if (c_o_direct)
22526   {
22527     jam();
22528     signal->theData[6] |= FsOpenReq::OM_DIRECT;
22529     if (c_o_direct_sync_flag)
22530     {
22531       jam();
22532       signal->theData[6] |= FsOpenReq::OM_DIRECT_SYNC;
22533     }
22534   }
22535   Uint64 sz = Uint64(clogFileSize) * 1024 * 1024;
22536   req->file_size_hi = Uint32(sz >> 32);
22537   req->file_size_lo = Uint32(sz);
22538   req->page_size = File_formats::NDB_PAGE_SIZE;
22539   if (m_use_om_init)
22540   {
22541     jam();
22542     signal->theData[6] |= FsOpenReq::OM_INIT;
22543   }
22544 
22545   req->auto_sync_size = MAX_REDO_PAGES_WITHOUT_SYNCH * sizeof(LogPageRecord);
22546   sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, FsOpenReq::SignalLength, JBA);
22547 }//Dblqh::openLogfileInit()
22548 
22549 void
execFSWRITEREQ(Signal * signal)22550 Dblqh::execFSWRITEREQ(Signal* signal)
22551 {
22552   /**
22553    * This is currently run in other thread -> no jam
22554    *   and no global variables
22555    *
22556    * This method is called from NDB file system while initialising a REDO log
22557    * file, so we need to ensure that we don't touch any block variables other
22558    * than to read stable variables. This is only called during initial
22559    * restart. The pages are allocated by NDBFS from DataMemory, so these can
22560    * be written to safely since they are owned by the file system thread.
22561    */
22562   Ptr<GlobalPage> page_ptr;
22563   FsReadWriteReq* req= (FsReadWriteReq*)signal->getDataPtr();
22564   m_shared_page_pool.getPtr(page_ptr, req->data.pageData[0]);
22565 
22566   LogFileRecordPtr currLogFilePtr;
22567   currLogFilePtr.i = req->userPointer;
22568   ptrCheckGuard(currLogFilePtr, clogFileFileSize, logFileRecord);
22569 
22570   LogPartRecordPtr currLogPartPtr;
22571   currLogPartPtr.i = currLogFilePtr.p->logPartRec;
22572   ptrCheckGuard(currLogPartPtr, clogPartFileSize, logPartRecord);
22573 
22574   Uint32 page_no = req->varIndex;
22575   LogPageRecordPtr currLogPagePtr;
22576   currLogPagePtr.p = (LogPageRecord*)page_ptr.p;
22577 
22578   bzero(page_ptr.p, sizeof(LogPageRecord));
22579   if (page_no == 0)
22580   {
22581     // keep writing these afterwards
22582   }
22583   else if (((page_no % ZPAGES_IN_MBYTE) == 0) ||
22584            (page_no == ((clogFileSize * ZPAGES_IN_MBYTE) - 1)))
22585   {
22586     currLogPagePtr.p->logPageWord[ZPOS_LOG_LAP] = currLogPartPtr.p->logLap;
22587     currLogPagePtr.p->logPageWord[ZPOS_MAX_GCI_COMPLETED] =
22588       currLogPartPtr.p->logPartNewestCompletedGCI;
22589     currLogPagePtr.p->logPageWord[ZPOS_MAX_GCI_STARTED] = cnewestGci;
22590     currLogPagePtr.p->logPageWord[ZPOS_VERSION] = NDB_VERSION;
22591     currLogPagePtr.p->logPageWord[ZPOS_NO_LOG_FILES] =
22592       currLogPartPtr.p->noLogFiles;
22593     currLogPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = ZPAGE_HEADER_SIZE;
22594     currLogPagePtr.p->logPageWord[ZLAST_LOG_PREP_REF] =
22595       (currLogFilePtr.p->fileNo << 16) +
22596       (currLogFilePtr.p->currentFilepage >> ZTWOLOG_NO_PAGES_IN_MBYTE);
22597 
22598     currLogPagePtr.p->logPageWord[ZNEXT_PAGE] = RNIL;
22599     currLogPagePtr.p->logPageWord[ZPOS_CHECKSUM] =
22600       calcPageCheckSum(currLogPagePtr);
22601   }
22602   else if (0)
22603   {
22604     currLogPagePtr.p->logPageWord[ZNEXT_PAGE] = RNIL;
22605     currLogPagePtr.p->logPageWord[ZPOS_CHECKSUM] =
22606       calcPageCheckSum(currLogPagePtr);
22607   }
22608 }
22609 
22610 /* OPEN FOR READ/WRITE, DO CREATE AND DO TRUNCATE FILE */
22611 /* ------------------------------------------------------------------------- */
22612 /* -------               OPEN NEXT LOG FILE                          ------- */
22613 /*                                                                           */
22614 /*       SUBROUTINE SHORT NAME = ONL                                         */
22615 /* ------------------------------------------------------------------------- */
openNextLogfile(Signal * signal)22616 void Dblqh::openNextLogfile(Signal* signal)
22617 {
22618   LogFileRecordPtr onlLogFilePtr;
22619 
22620   if (logPartPtr.p->noLogFiles > 2) {
22621     jam();
22622 /* -------------------------------------------------- */
22623 /*       IF ONLY 1 OR 2 LOG FILES EXIST THEN THEY ARE */
22624 /*       ALWAYS OPEN AND THUS IT IS NOT NECESSARY TO  */
22625 /*       OPEN THEM NOW.                               */
22626 /* -------------------------------------------------- */
22627     onlLogFilePtr.i = logFilePtr.p->nextLogFile;
22628     ptrCheckGuard(onlLogFilePtr, clogFileFileSize, logFileRecord);
22629 
22630 #ifdef ERROR_INSERT
22631     if (delayOpenFilePtrI == 0 && onlLogFilePtr.p->fileNo > 3 &&
22632         ERROR_INSERTED_CLEAR(5090))
22633     {
22634       /* Instruct execFSOPENCONF to delay the execution of the
22635        * signal for fileNo>3 to simulate a delay in opening it.
22636        * (Choice of '>3': File 0 is held open. Let files 1-3
22637        * being filled and opened normally. The next file belonging
22638        * to the log part being filled by the test will be delayed).
22639        */
22640       delayOpenFilePtrI = logFilePtr.p->nextLogFile;
22641     }
22642 #endif
22643 
22644     if (onlLogFilePtr.p->logFileStatus != LogFileRecord::CLOSED) {
22645       ndbrequire(onlLogFilePtr.p->fileNo == 0);
22646       return;
22647     }//if
22648     onlLogFilePtr.p->logFileStatus = LogFileRecord::OPENING_WRITE_LOG;
22649     FsOpenReq* req = (FsOpenReq*)signal->getDataPtrSend();
22650     signal->theData[0] = cownref;
22651     signal->theData[1] = onlLogFilePtr.i;
22652     signal->theData[2] = onlLogFilePtr.p->fileName[0];
22653     signal->theData[3] = onlLogFilePtr.p->fileName[1];
22654     signal->theData[4] = onlLogFilePtr.p->fileName[2];
22655     signal->theData[5] = onlLogFilePtr.p->fileName[3];
22656     signal->theData[6] = FsOpenReq::OM_READWRITE |
22657                          FsOpenReq::OM_AUTOSYNC |
22658                          FsOpenReq::OM_CHECK_SIZE |
22659                          FsOpenReq::OM_WRITE_BUFFER;
22660     if (c_o_direct)
22661     {
22662       jam();
22663       signal->theData[6] |= FsOpenReq::OM_DIRECT;
22664       if (c_o_direct_sync_flag)
22665       {
22666         jam();
22667         signal->theData[6] |= FsOpenReq::OM_DIRECT_SYNC;
22668       }
22669     }
22670     req->auto_sync_size = MAX_REDO_PAGES_WITHOUT_SYNCH * sizeof(LogPageRecord);
22671     Uint64 sz = clogFileSize;
22672     sz *= 1024; sz *= 1024;
22673     req->file_size_hi = (Uint32)(sz >> 32);
22674     req->file_size_lo = (Uint32)(sz & 0xFFFFFFFF);
22675     sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, FsOpenReq::SignalLength, JBA);
22676   }//if
22677 }//Dblqh::openNextLogfile()
22678 
22679         /* OPEN FOR READ/WRITE, DON'T CREATE AND DON'T TRUNCATE FILE */
22680 /* ------------------------------------------------------------------------- */
22681 /* -------                       RELEASE LFO RECORD                  ------- */
22682 /*                                                                           */
22683 /* ------------------------------------------------------------------------- */
releaseLfo(Signal * signal)22684 void Dblqh::releaseLfo(Signal* signal)
22685 {
22686 #ifdef VM_TRACE
22687   // Check that lfo record isn't already in free list
22688   LogFileOperationRecordPtr TlfoPtr;
22689   TlfoPtr.i = cfirstfreeLfo;
22690   while (TlfoPtr.i != RNIL){
22691     ptrCheckGuard(TlfoPtr, clfoFileSize, logFileOperationRecord);
22692     ndbrequire(TlfoPtr.i != lfoPtr.i);
22693     TlfoPtr.i = TlfoPtr.p->nextLfo;
22694   }
22695 #endif
22696   lfoPtr.p->nextLfo = cfirstfreeLfo;
22697   lfoPtr.p->lfoTimer = 0;
22698   cfirstfreeLfo = lfoPtr.i;
22699   lfoPtr.p->lfoState = LogFileOperationRecord::IDLE;
22700 }//Dblqh::releaseLfo()
22701 
22702 /* ------------------------------------------------------------------------- */
22703 /* ------- RELEASE ALL LOG PAGES CONNECTED TO A LFO RECORD           ------- */
22704 /*                                                                           */
22705 /*       SUBROUTINE SHORT NAME = RLP                                         */
22706 /* ------------------------------------------------------------------------- */
releaseLfoPages(Signal * signal)22707 void Dblqh::releaseLfoPages(Signal* signal)
22708 {
22709   logPagePtr.i = lfoPtr.p->firstLfoPage;
22710   while (logPagePtr.i != RNIL)
22711   {
22712     ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
22713     Uint32 tmp = logPagePtr.p->logPageWord[ZNEXT_PAGE];
22714     releaseLogpage(signal);
22715     logPagePtr.i = tmp;
22716   }
22717   lfoPtr.p->firstLfoPage = RNIL;
22718 }//Dblqh::releaseLfoPages()
22719 
22720 /* ------------------------------------------------------------------------- */
22721 /* -------                       RELEASE LOG PAGE                    ------- */
22722 /*                                                                           */
22723 /* ------------------------------------------------------------------------- */
releaseLogpage(Signal * signal)22724 void Dblqh::releaseLogpage(Signal* signal)
22725 {
22726 #ifdef VM_TRACE
22727   // Check that log page isn't already in free list
22728   ndbrequire(logPagePtr.p->logPageWord[ZPOS_IN_FREE_LIST] == 0);
22729 #endif
22730 
22731   cnoOfLogPages++;
22732   logPagePtr.p->logPageWord[ZNEXT_PAGE] = cfirstfreeLogPage;
22733   logPagePtr.p->logPageWord[ZPOS_IN_WRITING]= 0;
22734   logPagePtr.p->logPageWord[ZPOS_IN_FREE_LIST]= 1;
22735   cfirstfreeLogPage = logPagePtr.i;
22736 }//Dblqh::releaseLogpage()
22737 
22738 /* ------------------------------------------------------------------------- */
22739 /* -------       SEIZE LFO RECORD                                    ------- */
22740 /*                                                                           */
22741 /* ------------------------------------------------------------------------- */
seizeLfo(Signal * signal)22742 void Dblqh::seizeLfo(Signal* signal)
22743 {
22744   lfoPtr.i = cfirstfreeLfo;
22745   ptrCheckGuard(lfoPtr, clfoFileSize, logFileOperationRecord);
22746   cfirstfreeLfo = lfoPtr.p->nextLfo;
22747   lfoPtr.p->nextLfo = RNIL;
22748   lfoPtr.p->lfoTimer = cLqhTimeOutCount;
22749 }//Dblqh::seizeLfo()
22750 
22751 /* ------------------------------------------------------------------------- */
22752 /* -------       SEIZE LOG FILE RECORD                               ------- */
22753 /*                                                                           */
22754 /* ------------------------------------------------------------------------- */
seizeLogfile(Signal * signal)22755 void Dblqh::seizeLogfile(Signal* signal)
22756 {
22757   logFilePtr.i = cfirstfreeLogFile;
22758   ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
22759 /* ------------------------------------------------------------------------- */
22760 /*IF LIST IS EMPTY THEN A SYSTEM CRASH IS INVOKED SINCE LOG_FILE_PTR = RNIL  */
22761 /* ------------------------------------------------------------------------- */
22762   cfirstfreeLogFile = logFilePtr.p->nextLogFile;
22763   logFilePtr.p->nextLogFile = RNIL;
22764 }//Dblqh::seizeLogfile()
22765 
22766 /* ------------------------------------------------------------------------- */
22767 /* -------       SEIZE LOG PAGE RECORD                               ------- */
22768 /*                                                                           */
22769 /* ------------------------------------------------------------------------- */
seizeLogpage(Signal * signal)22770 void Dblqh::seizeLogpage(Signal* signal)
22771 {
22772   cnoOfLogPages--;
22773   logPagePtr.i = cfirstfreeLogPage;
22774   ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
22775 /* ------------------------------------------------------------------------- */
22776 /*IF LIST IS EMPTY THEN A SYSTEM CRASH IS INVOKED SINCE LOG_PAGE_PTR = RNIL  */
22777 /* ------------------------------------------------------------------------- */
22778   cfirstfreeLogPage = logPagePtr.p->logPageWord[ZNEXT_PAGE];
22779 #ifdef VM_TRACE
22780   bzero(logPagePtr.p, sizeof(LogPageRecord));
22781 #endif
22782   logPagePtr.p->logPageWord[ZNEXT_PAGE] = RNIL;
22783   logPagePtr.p->logPageWord[ZPOS_IN_FREE_LIST] = 0;
22784   /**
22785    * During an initial start of a data node with Diskless set to
22786    * 1 we need to initialise this variable to 0. Normally the
22787    * log page is initialised when read from file system. The
22788    * code above that zeroes the entire page in debug builds
22789    * shows that it is safe to perform this initialisation.
22790    */
22791   logPagePtr.p->logPageWord[ZPOS_LOG_LAP] = 0;
22792 }//Dblqh::seizeLogpage()
22793 
22794 /* ------------------------------------------------------------------------- */
22795 /* -------               WRITE FILE DESCRIPTOR INFORMATION           ------- */
22796 /*                                                                           */
22797 /*       SUBROUTINE SHORT NAME: WFD                                          */
22798 // Pointer handling:
22799 // logFilePtr in
22800 // logPartPtr in
22801 /* ------------------------------------------------------------------------- */
writeFileDescriptor(Signal * signal)22802 void Dblqh::writeFileDescriptor(Signal* signal)
22803 {
22804   TcConnectionrecPtr wfdTcConnectptr;
22805   UintR twfdFileNo;
22806   UintR twfdMbyte;
22807 
22808 /* -------------------------------------------------- */
22809 /*       START BY WRITING TO LOG FILE RECORD          */
22810 /* -------------------------------------------------- */
22811   arrGuard(logFilePtr.p->currentMbyte, clogFileSize);
22812   if (DEBUG_REDO)
22813   {
22814     printf("part: %u file: %u setting logMaxGciCompleted[%u] = %u logMaxGciStarted[%u]: %u lastPrepRef[%u]: ",
22815            logPartPtr.p->logPartNo,
22816            logFilePtr.p->fileNo,
22817            logFilePtr.p->currentMbyte,
22818            logPartPtr.p->logPartNewestCompletedGCI,
22819            logFilePtr.p->currentMbyte,
22820            cnewestGci,
22821            logFilePtr.p->currentMbyte);
22822     if (logPartPtr.p->firstLogTcrec == RNIL)
22823     {
22824       ndbout_c("file: %u mb: %u (RNIL)",
22825                logFilePtr.p->fileNo,
22826                logFilePtr.p->currentMbyte);
22827     }
22828     else
22829     {
22830       wfdTcConnectptr.i = logPartPtr.p->firstLogTcrec;
22831       ndbrequire(tcConnect_pool.getValidPtr(wfdTcConnectptr));
22832       ndbout_c("file: %u mb: %u",
22833                wfdTcConnectptr.p->logStartFileNo,
22834                wfdTcConnectptr.p->logStartPageNo >> ZTWOLOG_NO_PAGES_IN_MBYTE);
22835     }
22836   }
22837   logFilePtr.p->logMaxGciCompleted[logFilePtr.p->currentMbyte] =
22838     logPartPtr.p->logPartNewestCompletedGCI;
22839   logFilePtr.p->logMaxGciStarted[logFilePtr.p->currentMbyte] = cnewestGci;
22840   wfdTcConnectptr.i = logPartPtr.p->firstLogTcrec;
22841   if (wfdTcConnectptr.i != RNIL) {
22842     jam();
22843     ndbrequire(tcConnect_pool.getValidPtr(wfdTcConnectptr));
22844     twfdFileNo = wfdTcConnectptr.p->logStartFileNo;
22845     twfdMbyte = wfdTcConnectptr.p->logStartPageNo >> ZTWOLOG_NO_PAGES_IN_MBYTE;
22846     logFilePtr.p->logLastPrepRef[logFilePtr.p->currentMbyte] =
22847       (twfdFileNo << 16) + twfdMbyte;
22848   } else {
22849     jam();
22850     logFilePtr.p->logLastPrepRef[logFilePtr.p->currentMbyte] =
22851       (logFilePtr.p->fileNo << 16) + logFilePtr.p->currentMbyte;
22852   }//if
22853 }//Dblqh::writeFileDescriptor()
22854 
22855 /* ------------------------------------------------------------------------- */
22856 /* -------               WRITE THE HEADER PAGE OF A NEW FILE         ------- */
22857 /*                                                                           */
22858 /*       SUBROUTINE SHORT NAME:  WMO                                         */
22859 /* ------------------------------------------------------------------------- */
writeFileHeaderOpen(Signal * signal,Uint32 wmoType)22860 void Dblqh::writeFileHeaderOpen(Signal* signal, Uint32 wmoType)
22861 {
22862   UintR twmoNoLogDescriptors;
22863 
22864 /* -------------------------------------------------- */
22865 /*       WRITE HEADER INFORMATION IN THE NEW FILE.    */
22866 /* -------------------------------------------------- */
22867   logPagePtr.p->logPageWord[ZPAGE_HEADER_SIZE + ZPOS_LOG_TYPE] = ZFD_TYPE;
22868   logPagePtr.p->logPageWord[ZPAGE_HEADER_SIZE + ZPOS_FILE_NO] =
22869     logFilePtr.p->fileNo;
22870   /*
22871    * When writing a file header on open, we write cmaxLogFilesInPageZero,
22872    * though the entries for the first file (this file), will be invalid,
22873    * as we do not know e.g. which GCIs will be included by log records
22874    * in the MBs in this file.  On the first lap these will be initial values
22875    * on subsequent laps, they will be values from the previous lap.
22876    * We take care when reading these values back, not to use the values for
22877    * the current file.
22878    */
22879   if (logPartPtr.p->noLogFiles > cmaxLogFilesInPageZero) {
22880     jam();
22881     twmoNoLogDescriptors = cmaxLogFilesInPageZero;
22882   } else {
22883     jam();
22884     twmoNoLogDescriptors = logPartPtr.p->noLogFiles;
22885   }//if
22886   logPagePtr.p->logPageWord[ZPAGE_HEADER_SIZE + ZPOS_NO_FD] =
22887     twmoNoLogDescriptors;
22888 
22889   {
22890     Uint32 pos = ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE;
22891     LogFileRecordPtr filePtr = logFilePtr;
22892     for (Uint32 fd = 0; fd < twmoNoLogDescriptors; fd++)
22893     {
22894       jam();
22895       ptrCheckGuard(filePtr, clogFileFileSize, logFileRecord);
22896       for (Uint32 mb = 0; mb < clogFileSize; mb ++)
22897       {
22898         jam();
22899         Uint32 pos0 = pos + fd * (ZFD_MBYTE_SIZE * clogFileSize) + mb;
22900         Uint32 pos1 = pos0 + clogFileSize;
22901         Uint32 pos2 = pos1 + clogFileSize;
22902         arrGuard(pos0, ZPAGE_SIZE);
22903         arrGuard(pos1, ZPAGE_SIZE);
22904         arrGuard(pos2, ZPAGE_SIZE);
22905         logPagePtr.p->logPageWord[pos0] = filePtr.p->logMaxGciCompleted[mb];
22906         logPagePtr.p->logPageWord[pos1] = filePtr.p->logMaxGciStarted[mb];
22907         logPagePtr.p->logPageWord[pos2] = filePtr.p->logLastPrepRef[mb];
22908       }
22909       filePtr.i = filePtr.p->prevLogFile;
22910     }
22911     pos += (twmoNoLogDescriptors * ZFD_MBYTE_SIZE * clogFileSize);
22912     arrGuard(pos, ZPAGE_SIZE);
22913     logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = pos;
22914     logPagePtr.p->logPageWord[pos] = ZNEXT_LOG_RECORD_TYPE;
22915   }
22916 
22917 /* ------------------------------------------------------- */
22918 /*       THIS IS A SPECIAL WRITE OF THE FIRST PAGE IN THE  */
22919 /*       LOG FILE. THIS HAS SPECIAL SIGNIFANCE TO FIND     */
22920 /*       THE END OF THE LOG AT SYSTEM RESTART.             */
22921 /* ------------------------------------------------------- */
22922   if (wmoType == ZINIT) {
22923     jam();
22924     writeSinglePage(signal, 0, ZPAGE_SIZE - 1, __LINE__, false);
22925     lfoPtr.p->lfoState = LogFileOperationRecord::INIT_FIRST_PAGE;
22926   } else {
22927     jam();
22928     writeSinglePage(signal, 0, ZPAGE_SIZE - 1, __LINE__, true);
22929     lfoPtr.p->lfoState = LogFileOperationRecord::FIRST_PAGE_WRITE_IN_LOGFILE;
22930   }//if
22931   logFilePtr.p->filePosition = 1;
22932   if (wmoType == ZNORMAL) {
22933     jam();
22934 /* -------------------------------------------------- */
22935 /*       ALLOCATE A NEW PAGE SINCE THE CURRENT IS     */
22936 /*       WRITTEN.                                     */
22937 /* -------------------------------------------------- */
22938     seizeLogpage(signal);
22939     initLogpage(signal);
22940     logFilePtr.p->currentLogpage = logPagePtr.i;
22941     logFilePtr.p->currentFilepage = logFilePtr.p->currentFilepage + 1;
22942   }//if
22943 }//Dblqh::writeFileHeaderOpen()
22944 
22945 /* -------------------------------------------------- */
22946 /*       THE NEW FILE POSITION WILL ALWAYS BE 1 SINCE */
22947 /*       WE JUST WROTE THE FIRST PAGE IN THE LOG FILE */
22948 /* -------------------------------------------------- */
22949 /* ------------------------------------------------------------------------- */
22950 /* -------               WRITE A MBYTE HEADER DURING INITIAL START   ------- */
22951 /*                                                                           */
22952 /*       SUBROUTINE SHORT NAME: WIM                                          */
22953 /* ------------------------------------------------------------------------- */
writeInitMbyte(Signal * signal)22954 void Dblqh::writeInitMbyte(Signal* signal)
22955 {
22956   if (m_use_om_init == 0)
22957   {
22958     jam();
22959     initLogpage(signal);
22960     writeSinglePage(signal, logFilePtr.p->currentMbyte * ZPAGES_IN_MBYTE,
22961                     ZPAGE_SIZE - 1, __LINE__, false);
22962     lfoPtr.p->lfoState = LogFileOperationRecord::WRITE_INIT_MBYTE;
22963     checkReportStatus(signal);
22964   }
22965   else
22966   {
22967     jam();
22968     seizeLfo(signal);
22969     logFilePtr.p->currentMbyte = clogFileSize - 1;
22970     writeInitMbyteLab(signal);
22971   }
22972 }//Dblqh::writeInitMbyte()
22973 
22974 /* ------------------------------------------------------------------------- */
22975 /* -------               WRITE A SINGLE PAGE INTO A FILE             ------- */
22976 /*                                                                           */
22977 /*       INPUT:          TWSP_PAGE_NO    THE PAGE NUMBER WRITTEN             */
22978 /*       SUBROUTINE SHORT NAME:  WSP                                         */
22979 /* ------------------------------------------------------------------------- */
writeSinglePage(Signal * signal,Uint32 pageNo,Uint32 wordWritten,Uint32 place,bool sync)22980 void Dblqh::writeSinglePage(Signal* signal, Uint32 pageNo,
22981                             Uint32 wordWritten, Uint32 place,
22982                             bool sync)
22983 {
22984   seizeLfo(signal);
22985   initLfo(signal);
22986   lfoPtr.p->firstLfoPage = logPagePtr.i;
22987   logPagePtr.p->logPageWord[ZNEXT_PAGE] = RNIL;
22988 
22989   writeDbgInfoPageHeader(logPagePtr, place, pageNo, wordWritten);
22990   // Calculate checksum for page
22991   logPagePtr.p->logPageWord[ZPOS_CHECKSUM] = calcPageCheckSum(logPagePtr);
22992 
22993   lfoPtr.p->lfoPageNo = pageNo;
22994   lfoPtr.p->lfoWordWritten = wordWritten;
22995   lfoPtr.p->noPagesRw = 1;
22996 /* -------------------------------------------------- */
22997 /*       SET TIMER ON THIS LOG PART TO SIGNIFY THAT A */
22998 /*       LOG RECORD HAS BEEN SENT AT THIS TIME.       */
22999 /* -------------------------------------------------- */
23000   logPartPtr.p->logPartTimer = logPartPtr.p->logTimer;
23001   signal->theData[0] = logFilePtr.p->fileRef;
23002   signal->theData[1] = cownref;
23003   signal->theData[2] = lfoPtr.i;
23004   signal->theData[3] = sync ? ZLIST_OF_PAIRS_SYNCH : ZLIST_OF_PAIRS;
23005   signal->theData[4] = ZVAR_NO_LOG_PAGE_WORD;
23006   signal->theData[5] = 1;                     /* ONE PAGE WRITTEN */
23007   signal->theData[6] = logPagePtr.i;
23008   signal->theData[7] = pageNo;
23009   sendSignal(NDBFS_REF, GSN_FSWRITEREQ, signal, 8, JBA);
23010 
23011   if (logFilePtr.p->fileRef == RNIL)
23012   {
23013     signal->theData[0] = DumpStateOrd::LqhFailedHandlingGCP_SAVEREQ;
23014     execDUMP_STATE_ORD(signal);
23015   }
23016   ndbrequire(logFilePtr.p->fileRef != RNIL);
23017 
23018   logPartPtr.p->m_io_tracker.send_io(32768);
23019 
23020   if (DEBUG_REDO)
23021   {
23022     ndbout_c("writeSingle 1 page at part: %u file: %u page: %u (mb: %u)",
23023              logPartPtr.p->logPartNo,
23024              logFilePtr.p->fileNo,
23025              pageNo,
23026              pageNo >> ZTWOLOG_NO_PAGES_IN_MBYTE);
23027   }
23028 }//Dblqh::writeSinglePage()
23029 
23030 /* ##########################################################################
23031  *     SYSTEM RESTART PHASE ONE MODULE
23032  *     THIS MODULE IS A SUB-MODULE OF THE FILE SYSTEM HANDLING.
23033  *
23034  *     THIS MODULE CONTAINS THE CODE FOR THE FIRST PHASE OF THE SYSTEM RESTART.
23035  *     THE AIM OF THIS PHASE IS TO FIND THE END OF THE LOG AND TO FIND
23036  *     INFORMATION ABOUT WHERE GLOBAL CHECKPOINTS ARE COMPLETED AND STARTED
23037  *     IN THE LOG. THIS INFORMATION IS NEEDED TO START PHASE THREE OF
23038  *     THE SYSTEM RESTART.
23039  * ########################################################################## */
23040 /* --------------------------------------------------------------------------
23041  *     A SYSTEM RESTART OR NODE RESTART IS ONGOING. WE HAVE NOW OPENED FILE 0
23042  *     NOW WE NEED TO READ PAGE 0 TO FIND WHICH LOG FILE THAT WAS OPEN AT
23043  *     CRASH TIME.
23044  * -------------------------------------------------------------------------- */
openSrFrontpageLab(Signal * signal)23045 void Dblqh::openSrFrontpageLab(Signal* signal)
23046 {
23047   readSinglePage(signal, 0);
23048   lfoPtr.p->lfoState = LogFileOperationRecord::READ_SR_FRONTPAGE;
23049   return;
23050 }//Dblqh::openSrFrontpageLab()
23051 
23052 /* -------------------------------------------------------------------------
23053  * WE HAVE NOW READ PAGE 0 IN FILE 0. CHECK THE LAST OPEN FILE. ACTUALLY THE
23054  * LAST OPEN FILE COULD BE THE NEXT AFTER THAT. CHECK THAT FIRST. WHEN THE
23055  * LAST WAS FOUND WE CAN FIND ALL THE NEEDED INFORMATION WHERE TO START AND
23056  * STOP READING THE LOG.
23057  * -------------------------------------------------------------------------- */
readSrFrontpageLab(Signal * signal)23058 void Dblqh::readSrFrontpageLab(Signal* signal)
23059 {
23060   Uint32 num_parts_used;
23061   {
23062     jam();
23063     num_parts_used = logPagePtr.p->logPageWord[ZPOS_NO_LOG_PARTS];
23064   }
23065   /* Verify that number of log parts >= number of LQH workers */
23066   if (globalData.ndbMtLqhWorkers > num_parts_used) {
23067     char buf[255];
23068     BaseString::snprintf(buf, sizeof(buf),
23069       "Trying to start %d LQH workers with only %d log parts, try initial"
23070       " node restart to be able to use more LQH workers.",
23071       globalData.ndbMtLqhWorkers, num_parts_used);
23072     progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, buf);
23073   }
23074   if (num_parts_used != globalData.ndbLogParts)
23075   {
23076     char buf[255];
23077     BaseString::snprintf(buf, sizeof(buf),
23078       "Can only change NoOfLogParts through initial node restart, old"
23079       " value of NoOfLogParts = %d, tried using %d",
23080       num_parts_used, globalData.ndbLogParts);
23081     progError(__LINE__, NDBD_EXIT_INVALID_CONFIG, buf);
23082   }
23083 
23084   Uint32 fileNo = logPagePtr.p->logPageWord[ZPAGE_HEADER_SIZE + ZPOS_FILE_NO];
23085   /* ------------------------------------------------------------------------
23086    *    CLOSE FILE 0 SO THAT WE HAVE CLOSED ALL FILES WHEN STARTING TO READ
23087    *    THE FRAGMENT LOG. ALSO RELEASE PAGE ZERO.
23088    * ------------------------------------------------------------------------ */
23089   releaseLogpage(signal);
23090   logFilePtr.p->logFileStatus = LogFileRecord::CLOSING_SR_FRONTPAGE;
23091   closeFile(signal, logFilePtr, __LINE__);
23092   /* Lookup index of last file */
23093   LogFileRecordPtr locLogFilePtr;
23094   findLogfile(signal, fileNo, logPartPtr, &locLogFilePtr);
23095 
23096   /* Store in logPart record for use once file 0 is closed */
23097   logPartPtr.p->srLastFileIndex = locLogFilePtr.i;
23098   return;
23099 }//Dblqh::readSrFrontpageLab()
23100 
openSrLastFileLab(Signal * signal)23101 void Dblqh::openSrLastFileLab(Signal* signal)
23102 {
23103   readSinglePage(signal, 0);
23104   lfoPtr.p->lfoState = LogFileOperationRecord::READ_SR_LAST_FILE;
23105   return;
23106 }//Dblqh::openSrLastFileLab()
23107 
readSrLastFileLab(Signal * signal)23108 void Dblqh::readSrLastFileLab(Signal* signal)
23109 {
23110   logPartPtr.p->logLap = logPagePtr.p->logPageWord[ZPOS_LOG_LAP];
23111   if (DEBUG_REDO)
23112   {
23113     ndbout_c("readSrLastFileLab part: %u logExecState: %u logPartState: %u logLap: %u",
23114              logPartPtr.p->logPartNo,
23115              logPartPtr.p->logExecState,
23116              logPartPtr.p->logPartState,
23117              logPartPtr.p->logLap);
23118   }
23119   if (logPartPtr.p->noLogFiles > cmaxValidLogFilesInPageZero) {
23120     jam();
23121     initGciInLogFileRec(signal, cmaxValidLogFilesInPageZero);
23122   } else {
23123     jam();
23124     initGciInLogFileRec(signal, logPartPtr.p->noLogFiles);
23125   }//if
23126   /* ------------------------------------------------------------------------
23127    *    NOW WE HAVE FOUND THE LAST LOG FILE. WE ALSO NEED TO FIND THE LAST
23128    *    MBYTE THAT WAS LAST WRITTEN BEFORE THE SYSTEM CRASH.
23129    * ------------------------------------------------------------------------ */
23130   logPartPtr.p->lastLogfile = logFilePtr.i;
23131   /**
23132    * It is safe to read page 0 of the first MByte since we always ensure that
23133    * this page is up to date before we update current file number in page 0
23134    * of file 0. Given that we already have page 0 read, we can now call
23135    * readSrLastMbyteLab immediately, no need to reread page 0.
23136    */
23137   logFilePtr.p->currentMbyte = 0;
23138   readSrLastMbyteLab(signal);
23139   return;
23140 }//Dblqh::readSrLastFileLab()
23141 
readSrLastMbyteLab(Signal * signal)23142 void Dblqh::readSrLastMbyteLab(Signal* signal)
23143 {
23144   if (logPartPtr.p->lastMbyte == ZNIL)
23145   {
23146     if (logPagePtr.p->logPageWord[ZPOS_LOG_LAP] < logPartPtr.p->logLap) {
23147       jam();
23148       logPartPtr.p->lastMbyte = logFilePtr.p->currentMbyte - 1;
23149       if (DEBUG_REDO)
23150       {
23151         ndbout_c("readSrLastMbyteLab part: %u file: %u lastMbyte: %u",
23152                  logPartPtr.p->logPartNo,
23153                  logFilePtr.p->fileNo,
23154                  logPartPtr.p->lastMbyte);
23155       }
23156     }//if
23157   }//if
23158   arrGuard(logFilePtr.p->currentMbyte, clogFileSize);
23159   logFilePtr.p->logMaxGciCompleted[logFilePtr.p->currentMbyte] =
23160     logPagePtr.p->logPageWord[ZPOS_MAX_GCI_COMPLETED];
23161   logFilePtr.p->logMaxGciStarted[logFilePtr.p->currentMbyte] =
23162     logPagePtr.p->logPageWord[ZPOS_MAX_GCI_STARTED];
23163   logFilePtr.p->logLastPrepRef[logFilePtr.p->currentMbyte] =
23164     logPagePtr.p->logPageWord[ZLAST_LOG_PREP_REF];
23165   releaseLogpage(signal);
23166   if (logFilePtr.p->currentMbyte < (clogFileSize - 1)) {
23167     jam();
23168     logFilePtr.p->currentMbyte++;
23169     readSinglePage(signal, ZPAGES_IN_MBYTE * logFilePtr.p->currentMbyte);
23170     lfoPtr.p->lfoState = LogFileOperationRecord::READ_SR_LAST_MBYTE;
23171     return;
23172   } else {
23173     jam();
23174     /* ----------------------------------------------------------------------
23175      *    THE LOG WAS IN THE LAST MBYTE WHEN THE CRASH OCCURRED SINCE ALL
23176      *    LOG LAPS ARE EQUAL TO THE CURRENT LOG LAP.
23177      * ---------------------------------------------------------------------- */
23178     if (logPartPtr.p->lastMbyte == ZNIL) {
23179       jam();
23180       logPartPtr.p->lastMbyte = clogFileSize - 1;
23181     }//if
23182   }//if
23183   if (ERROR_INSERTED(5092))
23184   {
23185     jam();
23186     suspendFile(signal, logFilePtr, 3000); // Slow close
23187   }
23188 
23189   logFilePtr.p->logFileStatus = LogFileRecord::CLOSING_SR;
23190   closeFile(signal, logFilePtr, __LINE__);
23191 
23192   /* Head file is initialised by reading per-MB headers rather than per-file
23193    * headers.  Therefore, when stepping back through the redo files to get
23194    * the previous file's metadata, we must be careful not to read the
23195    * per-file header info over the just-read per-MB headers, invalidating
23196    * the head metainfo.
23197    */
23198   Uint32 nonHeadFileCount = logPartPtr.p->noLogFiles - 1;
23199 
23200   if (logPartPtr.p->noLogFiles > cmaxValidLogFilesInPageZero) {
23201     /* Step back from head to get file:mb metadata from a
23202      * previous file's page zero
23203      */
23204     Uint32 fileNo;
23205     if (logFilePtr.p->fileNo >= cmaxValidLogFilesInPageZero) {
23206       jam();
23207       fileNo = logFilePtr.p->fileNo - cmaxValidLogFilesInPageZero;
23208     } else {
23209       /* Wrap at 0:0 */
23210       jam();
23211       fileNo =
23212 	(logPartPtr.p->noLogFiles + logFilePtr.p->fileNo) -
23213 	cmaxValidLogFilesInPageZero;
23214     }//if
23215 
23216     jam();
23217     logPartPtr.p->srRemainingFiles =
23218       nonHeadFileCount - cmaxValidLogFilesInPageZero;
23219 
23220     /* Check we're making progress */
23221     ndbrequire(fileNo != logFilePtr.p->fileNo);
23222     LogFileRecordPtr locLogFilePtr;
23223     findLogfile(signal, fileNo, logPartPtr, &locLogFilePtr);
23224     ndbrequire(locLogFilePtr.p->logFileStatus == LogFileRecord::CLOSED);
23225     locLogFilePtr.p->logFileStatus = LogFileRecord::OPEN_SR_NEXT_FILE;
23226     openFileRw(signal, locLogFilePtr, false); /* No write buffering */
23227     return;
23228   }//if
23229   /* ------------------------------------------------------------------------
23230    *   THERE WERE NO NEED TO READ ANY MORE PAGE ZERO IN OTHER FILES.
23231    *   WE NOW HAVE ALL THE NEEDED INFORMATION ABOUT THE GCI'S THAT WE NEED.
23232    *   NOW JUST WAIT FOR CLOSE OPERATIONS TO COMPLETE.
23233    * ------------------------------------------------------------------------ */
23234   return;
23235 }//Dblqh::readSrLastMbyteLab()
23236 
openSrNextFileLab(Signal * signal)23237 void Dblqh::openSrNextFileLab(Signal* signal)
23238 {
23239   readSinglePage(signal, 0);
23240   lfoPtr.p->lfoState = LogFileOperationRecord::READ_SR_NEXT_FILE;
23241   return;
23242 }//Dblqh::openSrNextFileLab()
23243 
readSrNextFileLab(Signal * signal)23244 void Dblqh::readSrNextFileLab(Signal* signal)
23245 {
23246   if (logPartPtr.p->srRemainingFiles > cmaxValidLogFilesInPageZero) {
23247     jam();
23248     initGciInLogFileRec(signal, cmaxValidLogFilesInPageZero);
23249   } else {
23250     jam();
23251     initGciInLogFileRec(signal, logPartPtr.p->srRemainingFiles);
23252   }//if
23253   releaseLogpage(signal);
23254   if (ERROR_INSERTED(5092))
23255   {
23256     jam();
23257     suspendFile(signal, logFilePtr, 3000); // Slow close
23258   }
23259   logFilePtr.p->logFileStatus = LogFileRecord::CLOSING_SR;
23260   closeFile(signal, logFilePtr, __LINE__);
23261   if (logPartPtr.p->srRemainingFiles > cmaxValidLogFilesInPageZero) {
23262     /* Step back from head to get file:mb metadata from a
23263      * previous file's page zero
23264      */
23265     Uint32 fileNo;
23266     if (logFilePtr.p->fileNo >= cmaxValidLogFilesInPageZero) {
23267       jam();
23268       fileNo = logFilePtr.p->fileNo - cmaxValidLogFilesInPageZero;
23269     } else {
23270       /* Wrap at 0:0 */
23271       jam();
23272       fileNo =
23273 	(logPartPtr.p->noLogFiles + logFilePtr.p->fileNo) -
23274 	cmaxValidLogFilesInPageZero;
23275     }//if
23276 
23277     jam();
23278     logPartPtr.p->srRemainingFiles =
23279       logPartPtr.p->srRemainingFiles - cmaxValidLogFilesInPageZero;
23280 
23281     /* Check we're making progress */
23282     ndbrequire(fileNo != logFilePtr.p->fileNo);
23283 
23284     /**
23285      * Note that we are opening another file without waiting for
23286      * the previous FSCLOSECONF.
23287      * This can result in > 4 concurrently open files.
23288      */
23289     LogFileRecordPtr locLogFilePtr;
23290     findLogfile(signal, fileNo, logPartPtr, &locLogFilePtr);
23291     ndbrequire(locLogFilePtr.p->logFileStatus == LogFileRecord::CLOSED);
23292     locLogFilePtr.p->logFileStatus = LogFileRecord::OPEN_SR_NEXT_FILE;
23293     openFileRw(signal, locLogFilePtr, false); /* No write buffering */
23294   }//if
23295   /* ------------------------------------------------------------------------
23296    *   THERE WERE NO NEED TO READ ANY MORE PAGE ZERO IN OTHER FILES.
23297    *   WE NOW HAVE ALL THE NEEDED INFORMATION ABOUT THE GCI'S THAT WE NEED.
23298    *   NOW JUST WAIT FOR CLOSE OPERATIONS TO COMPLETE.
23299    * ------------------------------------------------------------------------ */
23300   return;
23301 }//Dblqh::readSrNextFileLab()
23302 
closingSrFrontPage(Signal * signal)23303 void Dblqh::closingSrFrontPage(Signal* signal)
23304 {
23305   jam();
23306   /* Front page (file 0) has closed, now it's safe to continue
23307    * to read any page (including file 0) as part of restoring
23308    * redo metadata
23309    */
23310   logFilePtr.p->logFileStatus = LogFileRecord::CLOSED;
23311   logPartPtr.i = logFilePtr.p->logPartRec;
23312   ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
23313   logFilePtr.i = logPartPtr.p->firstLogfile;
23314 
23315   /* Pre-restart head file index was stored in logPartPtr.p->srLastFileIndex
23316    * prior to closing this file, now let's use it...
23317    */
23318   ndbrequire(logPartPtr.p->srLastFileIndex != RNIL);
23319 
23320   LogFileRecordPtr oldHead;
23321   oldHead.i = logPartPtr.p->srLastFileIndex;
23322   ptrCheckGuard(oldHead, clogFileFileSize, logFileRecord);
23323 
23324   /* Reset srLastFileIndex */
23325   logPartPtr.p->srLastFileIndex = RNIL;
23326 
23327   /* And now open the head file to begin redo meta reload */
23328   oldHead.p->logFileStatus = LogFileRecord::OPEN_SR_LAST_FILE;
23329   openFileRw(signal, oldHead, false); /* No write buffering */
23330   return;
23331 }
23332 
closingSrLab(Signal * signal)23333 void Dblqh::closingSrLab(Signal* signal)
23334 {
23335   logFilePtr.p->logFileStatus = LogFileRecord::CLOSED;
23336   logPartPtr.i = logFilePtr.p->logPartRec;
23337   ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
23338   logFilePtr.i = logPartPtr.p->firstLogfile;
23339   do {
23340     jam();
23341     ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
23342     if (logFilePtr.p->logFileStatus != LogFileRecord::CLOSED) {
23343       jam();
23344       /* --------------------------------------------------------------------
23345        *  EXIT AND WAIT FOR REMAINING LOG FILES TO COMPLETE THEIR WORK.
23346        * -------------------------------------------------------------------- */
23347       return;
23348     }//if
23349     logFilePtr.i = logFilePtr.p->nextLogFile;
23350   } while (logFilePtr.i != logPartPtr.p->firstLogfile);
23351   /* ------------------------------------------------------------------------
23352    *  ALL FILES IN THIS PART HAVE BEEN CLOSED. THIS INDICATES THAT THE FIRST
23353    *  PHASE OF THE SYSTEM RESTART HAVE BEEN CONCLUDED FOR THIS LOG PART.
23354    *  CHECK IF ALL OTHER LOG PARTS ARE ALSO COMPLETED.
23355    * ------------------------------------------------------------------------ */
23356   logPartPtr.p->logPartState = LogPartRecord::SR_FIRST_PHASE_COMPLETED;
23357   for (logPartPtr.i = 0; logPartPtr.i < clogPartFileSize; logPartPtr.i++) {
23358     jam();
23359     ptrAss(logPartPtr, logPartRecord);
23360     if (logPartPtr.p->logPartState != LogPartRecord::SR_FIRST_PHASE_COMPLETED) {
23361       jam();
23362       /* --------------------------------------------------------------------
23363        * EXIT AND WAIT FOR THE REST OF THE LOG PARTS TO COMPLETE.
23364        * -------------------------------------------------------------------- */
23365       return;
23366     }//if
23367   }//for
23368   /* ------------------------------------------------------------------------
23369    *       THE FIRST PHASE HAVE BEEN COMPLETED.
23370    * ------------------------------------------------------------------------ */
23371   g_eventLogger->info("LDM(%u):"
23372                       "Ready to start execute REDO log phase,"
23373                       " prepare REDO log phase completed",
23374                       instance());
23375 
23376   signal->theData[0] = ZSR_PHASE3_START;
23377   signal->theData[1] = ZSR_PHASE1_COMPLETED;
23378   sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
23379   return;
23380 }//Dblqh::closingSrLab()
23381 
23382 /* ##########################################################################
23383  * #######                  SYSTEM RESTART PHASE TWO MODULE           #######
23384  *
23385  *  THIS MODULE HANDLES THE SYSTEM RESTART WHERE LQH CONTROLS TUP AND ACC TO
23386  *  ENSURE THAT THEY HAVE KNOWLEDGE OF ALL FRAGMENTS AND HAVE DONE THE NEEDED
23387  *  READING OF DATA FROM FILE AND EXECUTION OF LOCAL LOGS. THIS PROCESS
23388  *  EXECUTES CONCURRENTLY WITH PHASE ONE OF THE SYSTEM RESTART. THIS PHASE
23389  *  FINDS THE INFORMATION ABOUT THE FRAGMENT LOG NEEDED TO EXECUTE THE FRAGMENT
23390  *  LOG.
23391  *  WHEN TUP AND ACC HAVE PREPARED ALL FRAGMENTS THEN LQH ORDERS THOSE LQH'S
23392  *  THAT ARE RESPONSIBLE TO EXECUTE THE FRAGMENT LOGS TO DO SO. IT IS POSSIBLE
23393  *  THAT ANOTHER NODE EXECUTES THE LOG FOR A FRAGMENT RESIDING AT THIS NODE.
23394  * ########################################################################## */
23395 /* ***************>> */
23396 /*  START_FRAGREQ  > */
23397 /* ***************>> */
execSTART_FRAGREQ(Signal * signal)23398 void Dblqh::execSTART_FRAGREQ(Signal* signal)
23399 {
23400   /**
23401    * We don't need to worry about NOLOGGING tables and temporary tables
23402    * here. These fragments are added at restart, but not started since they
23403    * by definition are restored as empty fragments.
23404    */
23405   const StartFragReq * const startFragReq = (StartFragReq *)&signal->theData[0];
23406   jamEntry();
23407 
23408   c_fragmentsStarted++;
23409 
23410   tabptr.i = startFragReq->tableId;
23411   Uint32 fragId = startFragReq->fragId;
23412 
23413   ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
23414   if (!getFragmentrec(signal, fragId)) {
23415     startFragRefLab(signal);
23416     return;
23417   }//if
23418   tabptr.p->tableStatus = Tablerec::TABLE_DEFINED;
23419   DEB_SCHEMA_VERSION(("(%u)tab: %u tableStatus = TABLE_DEFINED(3)",
23420                       instance(),
23421                       tabptr.i));
23422   c_pgman->set_table_ready_for_prep_lcp_writes(tabptr.i, true);
23423 
23424   Uint32 lcpNo = startFragReq->lcpNo;
23425   Uint32 noOfLogNodes = startFragReq->noOfLogNodes;
23426   Uint32 lcpId = startFragReq->lcpId;
23427   Uint32 requestInfo = startFragReq->requestInfo;
23428   Uint32 nodeRestorableGci = startFragReq->nodeRestorableGci;
23429   if (signal->getLength() < StartFragReq::SignalOldLength)
23430   {
23431     jam();
23432     requestInfo = StartFragReq::SFR_RESTORE_LCP;
23433   }
23434   if (signal->getLength() < StartFragReq::SignalLength)
23435   {
23436     jam();
23437     nodeRestorableGci = 0;
23438   }
23439 
23440   bool doprint = false;
23441 #ifdef ERROR_INSERT
23442   /**
23443    * Always printSTART_FRAG_REQ (for debugging) if ERROR_INSERT is set
23444    */
23445   doprint = true;
23446 #endif
23447 #ifdef DEBUG_LCP
23448   doprint = true;
23449 #endif
23450   if (doprint || noOfLogNodes > 1)
23451   {
23452     printSTART_FRAG_REQ(stdout, signal->getDataPtr(), signal->getLength(),
23453                         number());
23454   }
23455 
23456   ndbrequire(noOfLogNodes <= MAX_LOG_EXEC);
23457   fragptr.p->fragStatus = Fragrecord::CRASH_RECOVERING;
23458   fragptr.p->srBlockref = startFragReq->userRef;
23459   fragptr.p->srUserptr = startFragReq->userPtr;
23460   fragptr.p->srChkpnr = lcpNo;
23461   if (lcpNo == (MAX_LCP_STORED - 1)) {
23462     jam();
23463     fragptr.p->lcpId[lcpNo] = lcpId;
23464   } else if (lcpNo < (MAX_LCP_STORED - 1)) {
23465     jam();
23466     fragptr.p->lcpId[lcpNo] = lcpId;
23467   } else {
23468     ndbrequire(lcpNo == ZNIL);
23469     jam();
23470   }//if
23471   fragptr.p->srNoLognodes = noOfLogNodes;
23472   fragptr.p->logFlag = Fragrecord::STATE_FALSE;
23473   fragptr.p->srStatus = Fragrecord::SS_IDLE;
23474 
23475   if (requestInfo == StartFragReq::SFR_COPY_FRAG)
23476   {
23477     jam();
23478     ndbrequire(lcpNo == ZNIL);
23479     fragptr.p->srLqhLognode[0] = startFragReq->lqhLogNode[0]; // src
23480 
23481     // Magic no, meaning to COPY_FRAGREQ instead of read from disk
23482     fragptr.p->srChkpnr = Z8NIL;
23483     c_fragmentsStartedWithCopy++;
23484     ndbrequire(noOfLogNodes == 0);
23485   }
23486 
23487   if (noOfLogNodes > 0)
23488   {
23489     jam();
23490     for (Uint32 i = 0; i < noOfLogNodes; i++) {
23491       jam();
23492       fragptr.p->srStartGci[i] = startFragReq->startGci[i];
23493       fragptr.p->srLastGci[i] = startFragReq->lastGci[i];
23494       fragptr.p->srLqhLognode[i] = startFragReq->lqhLogNode[i];
23495     }//for
23496     fragptr.p->newestGci = startFragReq->lastGci[noOfLogNodes - 1];
23497     fragptr.p->m_completed_gci = startFragReq->lastGci[noOfLogNodes - 1];
23498   }
23499   else
23500   {
23501     jam();
23502     /**
23503      * This is a really weird piece of code
23504      *   it's probably incorrect, but seems to mask problems...
23505      *
23506      * This code can only be executed by node restarts. In this
23507      * case having no log nodes simply means that we restore
23508      * entirely from the live node. This is indicated by
23509      * nodeRestorableGci == 0.
23510      * In reality there should be some REDO log to execute, but
23511      * this should only happen immediately after creating table
23512      * and no LCP executed before crash, so should be ok to skip
23513      * the REDO log and instead restore from live node for this
23514      * specific case. To use the REDO log would require ensuring
23515      * that not multiple failures have occurred, so this makes
23516      * code a bit simpler although a bit less efficient in this
23517      * specific case.
23518      */
23519     if (cnewestGci > fragptr.p->newestGci)
23520     {
23521       jam();
23522       fragptr.p->newestGci = cnewestGci;
23523     }
23524     fragptr.p->m_completed_gci = 0;
23525   }//if
23526 
23527   /**
23528    * To slightly speed up the restart newer versions send the newest
23529    * GCI that the node can restore on its own. This is the last GCI
23530    * where the node completed the GCI protocol. This is an important
23531    * number as we cannot use any LCPs that have written any GCI which
23532    * is newer than this number.
23533    *
23534    * In upgrade cases we hold off with starting to send RESTORE_LCP_REQ
23535    * until we have received START_RECREQ where this also arrives. In
23536    * newer versions we added this already to the START_FRAGREQ signal.
23537    */
23538   if (nodeRestorableGci != 0)
23539   {
23540     jam();
23541     if (crestartNewestGci == 0 ||
23542         crestartNewestGci == ZUNDEFINED_GCI_LIMIT)
23543     {
23544       jam();
23545       crestartNewestGci = nodeRestorableGci;
23546     }
23547     else
23548     {
23549       ndbrequire(crestartNewestGci == nodeRestorableGci);
23550     }
23551   }
23552 
23553   c_lcp_waiting_fragments.addLast(fragptr);
23554   if (requestInfo == StartFragReq::SFR_COPY_FRAG)
23555   {
23556     jam();
23557   }
23558   else if (lcpNo == ZNIL)
23559   {
23560     /**
23561      *  THERE WAS NO LOCAL CHECKPOINT AVAILABLE FOR THIS FRAGMENT. WE DO
23562      *  NOT NEED TO READ IN THE LOCAL FRAGMENT.
23563      *
23564      * Given that we might have completed the local checkpoint before DIH
23565      * got to know about it in LCP format introduced in 7.5 we will still
23566      * try to restore the LCP locally. If no LCP control files then we will
23567      * not attempt to execute the LCP however, rather we will delete the
23568      * LCP files and ensure that a control file exists there but no data
23569      * files.
23570      *
23571      * fragPtr.p->srChkpnr == ZNIL indicates to RESTORE block that DIH didn't
23572      * know about any LCP for this fragment.
23573      */
23574     jam();
23575   }
23576   else
23577   {
23578     jam();
23579 
23580     if (ERROR_INSERTED(5055))
23581     {
23582       ndbrequire(c_restart_lcpId == 0 ||
23583                  lcpId == 0 ||
23584                  c_restart_lcpId == lcpId);
23585     }
23586   }
23587   if (nodeRestorableGci != 0 && c_lcp_restoring_fragments.isEmpty())
23588   {
23589     jam();
23590     send_restore_lcp(signal);
23591   }
23592 }//Dblqh::execSTART_FRAGREQ()
23593 
23594 void
send_restore_lcp(Signal * signal)23595 Dblqh::send_restore_lcp(Signal * signal)
23596 {
23597   c_lcp_waiting_fragments.first(fragptr);
23598   c_lcp_waiting_fragments.remove(fragptr);
23599   c_lcp_restoring_fragments.addLast(fragptr);
23600 
23601   if (fragptr.p->srChkpnr != Z8NIL)
23602   {
23603     /**
23604      * We're sending the DIH view to the RESTORE block, this is necessary
23605      * in upgrade situations. In the case when the LCP was created by 7.5
23606      * and later the RESTORE block will itself discover the LCP id used to
23607      * recover, it will use the GCI to restore to get this information
23608      * and the information stored in the LCP control files.
23609      *
23610      * The RESTORE block will return the LCP id used to restore in the
23611      * CONF signal. This makes it possible for DBTUP to use the correct
23612      * LCP id to restore the disk data. (This includes the local LCP id).
23613      */
23614     jam();
23615     RestoreLcpReq* req= (RestoreLcpReq*)signal->getDataPtrSend();
23616     req->senderData = fragptr.i;
23617     req->senderRef = reference();
23618     req->tableId = fragptr.p->tabRef;
23619     req->fragmentId = fragptr.p->fragId;
23620     req->lcpNo = fragptr.p->srChkpnr;
23621     if (fragptr.p->srChkpnr == ZNIL)
23622     {
23623       jam();
23624       req->lcpId = 0;
23625       req->maxGciCompleted = 0;
23626     }
23627     else
23628     {
23629       jam();
23630       req->lcpId = fragptr.p->lcpId[fragptr.p->srChkpnr];
23631       req->maxGciCompleted = fragptr.p->srStartGci[0] - 1;
23632     }
23633     req->restoreGcpId = crestartNewestGci;
23634     if (c_local_sysfile.m_max_gci_restorable > ZUNDEFINED_GCI_LIMIT)
23635     {
23636       jam();
23637       ndbrequire(c_local_sysfile.m_max_gci_restorable >=
23638                  crestartNewestGci);
23639       req->restoreGcpId = c_local_sysfile.m_max_gci_restorable;
23640     }
23641     /**
23642      * DIH could potentially send a createGci that is newer than
23643      * what is restorable. This could happen when the table was created
23644      * very close to the crash.
23645      * We will still keep the createGci as is to discover if it is the
23646      * same table since all LCP control files are tagged with the
23647      * createGci.
23648      */
23649     req->createGci = fragptr.p->createGci;
23650     BlockReference restoreRef = calcInstanceBlockRef(RESTORE);
23651     sendSignal(restoreRef, GSN_RESTORE_LCP_REQ, signal,
23652                RestoreLcpReq::SignalLength, JBB);
23653   }
23654   else
23655   {
23656     jam();
23657 
23658     tabptr.i = fragptr.p->tabRef;
23659     ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
23660 
23661     fragptr.p->fragStatus = Fragrecord::ACTIVE_CREATION;
23662     CopyFragReq * req = CAST_PTR(CopyFragReq, signal->getDataPtrSend());
23663     req->senderData = fragptr.i;
23664     req->senderRef = reference();
23665     req->tableId = fragptr.p->tabRef;
23666     req->fragId = fragptr.p->fragId;
23667     req->nodeId = getOwnNodeId();
23668     req->schemaVersion = tabptr.p->schemaVersion;
23669     req->distributionKey = 0;
23670     req->gci = fragptr.p->lcpId[0];
23671     req->nodeCount = 0;
23672     req->nodeList[1] = CopyFragReq::CFR_NON_TRANSACTIONAL;
23673     Uint32 instanceKey = fragptr.p->lqhInstanceKey;
23674     BlockReference ref = numberToRef(DBLQH, instanceKey,
23675                                      fragptr.p->srLqhLognode[0]);
23676 
23677     sendSignal(ref, GSN_COPY_FRAGREQ, signal,
23678                CopyFragReq::SignalLength, JBB);
23679   }
23680 }
23681 
23682 void
execCOPY_FRAGREF(Signal * signal)23683 Dblqh::execCOPY_FRAGREF(Signal* signal)
23684 {
23685   jamEntry();
23686 
23687   const CopyFragRef * ref = CAST_CONSTPTR(CopyFragRef, signal->getDataPtr());
23688   Uint32 errorCode = ref->errorCode;
23689 
23690   SystemError * sysErr = (SystemError*)&signal->theData[0];
23691   sysErr->errorCode = SystemError::CopyFragRefError;
23692   sysErr->errorRef = reference();
23693   sysErr->data[0] = errorCode;
23694   sysErr->data[1] = 0;
23695   sendSignal(NDBCNTR_REF, GSN_SYSTEM_ERROR, signal,
23696              SystemError::SignalLength, JBB);
23697 }
23698 
23699 void
execCOPY_FRAGCONF(Signal * signal)23700 Dblqh::execCOPY_FRAGCONF(Signal* signal)
23701 {
23702   jamEntry();
23703   {
23704     const CopyFragConf* conf = CAST_CONSTPTR(CopyFragConf,
23705                                              signal->getDataPtr());
23706     c_fragment_pool.getPtr(fragptr, conf->senderData);
23707     fragptr.p->fragStatus = Fragrecord::CRASH_RECOVERING;
23708 
23709     Uint32 rows_lo = conf->rows_lo;
23710     Uint32 bytes_lo = conf->bytes_lo;
23711     signal->theData[0] = NDB_LE_NR_CopyFragDone;
23712     signal->theData[1] = getOwnNodeId();
23713     signal->theData[2] = fragptr.p->tabRef;
23714     signal->theData[3] = fragptr.p->fragId;
23715     signal->theData[4] = rows_lo;
23716     signal->theData[5] = 0;
23717     signal->theData[6] = bytes_lo;
23718     signal->theData[7] = 0;
23719     sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 8, JBB);
23720     g_eventLogger->debug("(%u)tab(%u,%u), COPY_FRAGCONF: %u rows inserted",
23721                          instance(),
23722                          fragptr.p->tabRef,
23723                          fragptr.p->fragId,
23724                          rows_lo);
23725   }
23726 
23727   {
23728     RestoreLcpConf* conf= (RestoreLcpConf*)signal->getDataPtr();
23729     conf->senderData = fragptr.i;
23730     conf->restoredLcpId = RNIL;
23731     conf->restoredLocalLcpId = RNIL;
23732     conf->afterRestore = 0;
23733     execRESTORE_LCP_CONF(signal);
23734   }
23735 }
23736 
startFragRefLab(Signal * signal)23737 void Dblqh::startFragRefLab(Signal* signal)
23738 {
23739   const StartFragReq * const startFragReq = (StartFragReq *)&signal->theData[0];
23740   BlockReference userRef = startFragReq->userRef;
23741   Uint32 userPtr = startFragReq->userPtr;
23742   signal->theData[0] = userPtr;
23743   signal->theData[1] = terrorCode;
23744   signal->theData[2] = cownNodeid;
23745   sendSignal(userRef, GSN_START_FRAGREF, signal, 3, JBB);
23746   return;
23747 }//Dblqh::startFragRefLab()
23748 
execRESTORE_LCP_REF(Signal * signal)23749 void Dblqh::execRESTORE_LCP_REF(Signal* signal)
23750 {
23751   jamEntry();
23752   ndbabort();
23753   return;
23754 }
23755 
move_start_gci_forward(Signal * signal,Uint32 new_start_gci)23756 void Dblqh::move_start_gci_forward(Signal *signal, Uint32 new_start_gci)
23757 {
23758   Uint32 remove_range = 0;
23759   for (Uint32 i = 0; i < fragptr.p->srNoLognodes; i++)
23760   {
23761     jam();
23762     if (fragptr.p->srStartGci[i] == new_start_gci)
23763     {
23764       jam();
23765       /**
23766        * The RESTORE block didn't move forward the starting point of
23767        * the REDO log execution.
23768        */
23769       break;
23770     }
23771     ndbrequire(new_start_gci > fragptr.p->srStartGci[i]);
23772     if (fragptr.p->srLastGci[i] >= new_start_gci)
23773     {
23774       jam();
23775       /**
23776        * We move it forward within this range, no need to remove any
23777        * range.
23778        */
23779       fragptr.p->srStartGci[i] = new_start_gci;
23780       break;
23781     }
23782     /**
23783      * The entire first range need to be removed.
23784      */
23785     ndbrequire((i + 1) <= fragptr.p->srNoLognodes);
23786     remove_range++;
23787   }
23788   if (remove_range == 0)
23789   {
23790     jam();
23791     return;
23792   }
23793   /**
23794    * Remove ranges by moving them one step at a time closer to index 0.
23795    */
23796   for (Uint32 i = 0; i < remove_range; i++)
23797   {
23798     Uint32 index = fragptr.p->srNoLognodes - 1;
23799     for (Uint32 j = 0; j < index; j++)
23800     {
23801       fragptr.p->srStartGci[j] = fragptr.p->srStartGci[j+1];
23802       fragptr.p->srLastGci[j] = fragptr.p->srLastGci[j+1];
23803       fragptr.p->srLqhLognode[j] = fragptr.p->srLqhLognode[j+1];
23804     }
23805     fragptr.p->srNoLognodes--;
23806   }
23807 }
23808 
execRESTORE_LCP_CONF(Signal * signal)23809 void Dblqh::execRESTORE_LCP_CONF(Signal* signal)
23810 {
23811   jamEntry();
23812   RestoreLcpConf* conf= (RestoreLcpConf*)signal->getDataPtr();
23813   fragptr.i = conf->senderData;
23814   Uint32 restoredLcpId = conf->restoredLcpId;
23815   Uint32 restoredLocalLcpId = conf->restoredLocalLcpId;
23816   Uint32 maxGciCompleted = conf->maxGciCompleted;
23817   Uint32 afterRestore = conf->afterRestore;
23818   c_fragment_pool.getPtr(fragptr);
23819 
23820   {
23821     /**
23822      * Calculate average row size after restore.
23823      */
23824     Uint32 max_page_cnt;
23825     Uint64 row_count;
23826     Uint64 prev_row_count;
23827     Uint64 row_change_count;
23828     Uint64 memory_used_in_bytes;
23829     c_tup->get_lcp_frag_stats(fragptr.p->tupFragptr,
23830                               0, /* Ignored when reset flag is false */
23831                               max_page_cnt,
23832                               row_count,
23833                               prev_row_count,
23834                               row_change_count,
23835                               memory_used_in_bytes,
23836                               false);
23837   }
23838   c_lcp_restoring_fragments.remove(fragptr);
23839   c_lcp_complete_fragments.addLast(fragptr);
23840 
23841   if (afterRestore != 0)
23842   {
23843     jam();
23844     if (restoredLcpId == 0 &&
23845         restoredLocalLcpId == 0 &&
23846         maxGciCompleted == 0)
23847     {
23848       jam();
23849       /**
23850        * The RESTORE block could not find any LCP for this fragment
23851        * to restore. So in order to ensure that we don't attempt
23852        * to execute any UNDO log record we act as if we had hit
23853        * a CREATE TABLE in the UNDO log and set the UNDO log
23854        * execution for this fragment to completed.
23855        *
23856        * There is no need to move start GCI forward for this fragment
23857        * since we have not found any newer LCP for sure.
23858        */
23859       c_tup->disk_restart_lcp_id(fragptr.p->tabRef,
23860                                  fragptr.p->fragId,
23861                                  RNIL,
23862                                  0);
23863     }
23864     else
23865     {
23866       jam();
23867       /**
23868        * Keep track of minimal lcp-id (including local lcp id)
23869        * also keep track of maximum tuple of (lcpId, localLcpId).
23870        * The first time we come we will set the lcp id and local
23871        * lcp id, after that only set it if the pair is smaller
23872        * than the previously smallest.
23873        */
23874       if ((c_restart_lcpId == 0) ||
23875           (c_restart_lcpId > restoredLcpId) ||
23876           (c_restart_lcpId == restoredLcpId &&
23877            c_restart_localLcpId > restoredLocalLcpId))
23878       {
23879         jam();
23880         c_restart_lcpId = restoredLcpId;
23881         c_restart_localLcpId = restoredLocalLcpId;
23882       }
23883       if ((restoredLcpId > c_restart_maxLcpId) ||
23884           ((restoredLcpId == c_restart_maxLcpId &&
23885             restoredLocalLcpId > c_restart_maxLocalLcpId)))
23886       {
23887         jam();
23888         c_restart_maxLcpId = restoredLcpId;
23889         c_restart_maxLocalLcpId = restoredLocalLcpId;
23890       }
23891       c_tup->disk_restart_lcp_id(fragptr.p->tabRef,
23892                                  fragptr.p->fragId,
23893                                  restoredLcpId,
23894                                  restoredLocalLcpId);
23895       Uint32 startGci = maxGciCompleted + 1;
23896       fragptr.p->m_completed_gci = maxGciCompleted;
23897       move_start_gci_forward(signal, startGci);
23898     }
23899   }
23900   else
23901   {
23902     ndbrequire(fragptr.p->srNoLognodes == 0);
23903   }
23904   if (fragptr.p->srNoLognodes == 0)
23905   {
23906     jam();
23907     /**
23908      * 3 potential reasons for getting here:
23909      * -------------------------------------
23910      * 1) We have removed at least one range and have no one left. This means
23911      *    we are now completed also with REDO logging and we can set the
23912      *    fragment state to active and also set it to enable logging.
23913      *
23914      * 2) We are restoring using SFR_COPY_FRAG and in this case afterRestore
23915      *    is set to 0 and number of log nodes is 0. So REDO logging is
23916      *    completed.
23917      *
23918      * 3) We are performing a node restart and no LCP was found, we ignore
23919      *    any REDO logging in this case and thus we have also here completed
23920      *    REDO logging.
23921      *
23922      * We need to be careful in setting up all fragments as if REDO logging
23923      * was done since we could potentially start up an LCP on the fragment
23924      * even before the copy fragment process is started.
23925      */
23926     sendSTART_FRAGCONF(signal);
23927   }
23928 
23929   if (!c_lcp_waiting_fragments.isEmpty())
23930   {
23931     send_restore_lcp(signal);
23932     return;
23933   }
23934 
23935   if (c_lcp_restoring_fragments.isEmpty() &&
23936       cstartRecReq == SRR_START_REC_REQ_ARRIVED)
23937   {
23938     jam();
23939     /* ----------------------------------------------------------------
23940      *  WE HAVE ALSO RECEIVED AN INDICATION THAT NO MORE FRAGMENTS
23941      *  NEEDS RESTART.
23942      *  NOW IT IS TIME TO START EXECUTING THE UNDO LOG.
23943      * ----------------------------------------------------------------
23944      *  WE ARE NOW IN A POSITION TO ORDER TUP TO START
23945      *  EXECUTING THEIR UNDO LOGS. THIS MUST BE DONE BEFORE THE
23946      *  FRAGMENT LOGS CAN BE EXECUTED.
23947      * ---------------------------------------------------------------- */
23948 
23949     mark_end_of_lcp_restore(signal);
23950 
23951     /* Log Event denoting the completion of the LCP restore */
23952     signal->theData[0] = NDB_LE_LCPRestored;
23953     signal->theData[1] = c_restart_lcpId;
23954     sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
23955 
23956     csrExecUndoLogState = EULS_STARTED;
23957     lcpPtr.i = 0;
23958     ptrAss(lcpPtr, lcpRecord);
23959     lcpPtr.p->m_outstanding = 1;
23960 
23961     if (cstartType == NodeState::ST_INITIAL_NODE_RESTART)
23962     {
23963       jam();
23964       /**
23965        * Skip lgman undo...
23966        */
23967       signal->theData[0] = LGMAN_REF;
23968       sendSignal(reference(), GSN_START_RECCONF, signal, 1, JBB);
23969       return;
23970     }
23971 
23972     if (!isNdbMtLqh())
23973     {
23974       jam();
23975       signal->theData[0] = c_restart_lcpId;
23976       signal->theData[1] = c_restart_localLcpId;
23977       sendSignal(LGMAN_REF, GSN_START_RECREQ, signal, 2, JBB);
23978     }
23979     else
23980     {
23981       jam();
23982       signal->theData[0] = c_restart_lcpId;
23983       signal->theData[1] = c_restart_localLcpId;
23984       signal->theData[2] = LGMAN;
23985       sendSignal(DBLQH_REF, GSN_START_RECREQ, signal, 3, JBB);
23986     }
23987   }
23988 }
23989 
23990 #define WLS_GCP_COMPLETE 0
23991 #define WLS_RESTART_COMPLETE 2
23992 #define WLS_GCP_COMPLETE_LATE 3
23993 
23994 /**
23995  * The local sysfile ensures that we keep track of what is recoverable
23996  * locally. It doesn't have to be updated all the time since this is
23997  * the job of the distributed sysfile in DBDIH. It is used to keep
23998  * track of GCI restorable during restarts. We also validate that
23999  * GCI coming from DBDIH is recoverable. We need to maintain the
24000  * maximum restorable GCI until we have written that the restart
24001  * is completed into the local sysfile.
24002  *
24003  * After the restart is completed we need not update the local sysfile
24004  * anymore.
24005  */
24006 void
write_local_sysfile_gcp_complete(Signal * signal,Uint32 gci)24007 Dblqh::write_local_sysfile_gcp_complete(Signal *signal, Uint32 gci)
24008 {
24009   write_local_sysfile(signal, WLS_GCP_COMPLETE, gci);
24010 }
24011 
24012 void
write_local_sysfile_gcp_complete_late(Signal * signal,Uint32 gci)24013 Dblqh::write_local_sysfile_gcp_complete_late(Signal *signal, Uint32 gci)
24014 {
24015   write_local_sysfile(signal, WLS_GCP_COMPLETE_LATE, gci);
24016 }
24017 
24018 void
write_local_sysfile_restart_complete(Signal * signal)24019 Dblqh::write_local_sysfile_restart_complete(Signal *signal)
24020 {
24021   write_local_sysfile(signal, WLS_RESTART_COMPLETE, 0);
24022 }
24023 
24024 void
write_local_sysfile(Signal * signal,Uint32 type,Uint32 gci)24025 Dblqh::write_local_sysfile(Signal *signal, Uint32 type, Uint32 gci)
24026 {
24027   WriteLocalSysfileReq *req = (WriteLocalSysfileReq*)signal->getDataPtrSend();
24028   req->userPointer = type;
24029   req->userReference = reference();
24030   Uint32 nodeRestorableFlag;
24031   ndbrequire(is_first_instance());
24032   switch (type)
24033   {
24034     case WLS_GCP_COMPLETE:
24035     case WLS_GCP_COMPLETE_LATE:
24036     {
24037       jam();
24038       nodeRestorableFlag = ReadLocalSysfileReq::NODE_NOT_RESTORABLE_ON_ITS_OWN;
24039       req->lastWrite = 0;
24040       break;
24041     }
24042     case WLS_RESTART_COMPLETE:
24043     {
24044       jam();
24045       nodeRestorableFlag = ReadLocalSysfileReq::NODE_RESTORABLE_ON_ITS_OWN;
24046       req->lastWrite = 1;
24047       break;
24048     }
24049     default:
24050     {
24051       ndbabort();
24052       return; // Keep compiler quiet
24053     }
24054   }
24055   c_outstanding_write_local_sysfile = true;
24056   req->nodeRestorableOnItsOwn = nodeRestorableFlag;
24057   req->maxGCIRestorable = gci;
24058   sendSignal(NDBCNTR_REF, GSN_WRITE_LOCAL_SYSFILE_REQ, signal,
24059              WriteLocalSysfileReq::SignalLength, JBB);
24060 }
24061 
24062 void
execWRITE_LOCAL_SYSFILE_CONF(Signal * signal)24063 Dblqh::execWRITE_LOCAL_SYSFILE_CONF(Signal *signal)
24064 {
24065   WriteLocalSysfileConf *conf = (WriteLocalSysfileConf*)signal->getDataPtr();
24066   ndbrequire(is_first_instance());
24067   c_outstanding_write_local_sysfile = false;
24068   switch (conf->userPointer)
24069   {
24070     case WLS_GCP_COMPLETE:
24071     {
24072       jam();
24073       /**
24074        * This return signal is only sent to first instance and the only impact
24075        * of it is to send GCP_SAVEREF. All sending of RESTORABLE_GCI_REP is
24076        * taken care of by the NDBCNTR block in this case.
24077        */
24078       ndbrequire(cstartPhase != ZNIL);
24079       if (c_start_phase_9_waiting)
24080       {
24081         jam();
24082         /**
24083          * We have reached phase 9 during writing of the local sysfile.
24084          * We proceed immediately to update the local sysfile with the
24085          * fact that the restart is complete. After this we can synchronize
24086          * the GCP and report GCP_SAVECONF.
24087          */
24088         c_send_gcp_saveref_needed = false;
24089         write_local_sysfile_restart_complete(signal);
24090       }
24091       else
24092       {
24093         jam();
24094         write_local_sysfile_gcp_complete_done(signal);
24095       }
24096       return;
24097     }
24098     case WLS_GCP_COMPLETE_LATE:
24099     {
24100       jam();
24101       ndbrequire(cstartPhase != ZNIL);
24102       if (c_start_phase_9_waiting)
24103       {
24104         jam();
24105         /**
24106          * We have reached phase 9 during writing of local sysfile, proceed
24107          * to write local sysfile with the information that the restart is
24108          * completed before synching the GCP.
24109          */
24110         write_local_sysfile_restart_complete(signal);
24111       }
24112       else
24113       {
24114         jam();
24115         /**
24116          * We have completed the first LCP, but the restart isn't quite done
24117          * yet. We wrote the local sysfile to keep the maximum restartable
24118          * GCI value up to date. Now proceed to synch the GCP in this first
24119          * instance of LDM threads.
24120          */
24121         start_synch_gcp(signal);
24122       }
24123       return;
24124     }
24125     case WLS_RESTART_COMPLETE:
24126     {
24127       jam();
24128       /**
24129        * Restart is complete, we have written this into the local sysfile
24130        * and we are ready to proceed with the last phases of restart and
24131        * synching this GCP as requested.
24132        */
24133       ndbrequire(cstartPhase != ZNIL);
24134       ndbrequire(c_start_phase_9_waiting);
24135       g_eventLogger->info("Restart complete, updated local sysfile");
24136       write_local_sysfile_restart_complete_done(signal);
24137       start_synch_gcp(signal);
24138       return;
24139     }
24140     default:
24141     {
24142       ndbabort();
24143     }
24144   }
24145 }
24146 
24147 /* ***************> */
24148 /*  START_RECREQ  > */
24149 /* ***************> */
execSTART_RECREQ(Signal * signal)24150 void Dblqh::execSTART_RECREQ(Signal* signal)
24151 {
24152   CRASH_INSERTION(5027);
24153 
24154   jamEntry();
24155   StartRecReq * const req = (StartRecReq*)&signal->theData[0];
24156 
24157   if (signal->getNoOfSections() >= 1)
24158   {
24159     jam();
24160     Uint32 senderVersion = getNodeInfo(refToNode(signal->getSendersBlockRef())).m_version;
24161     ndbrequire(ndbd_send_node_bitmask_in_section(senderVersion));
24162     SegmentedSectionPtr ptr;
24163     SectionHandle handle(this,signal);
24164     handle.getSection(ptr, 0);
24165     ndbrequire(ptr.sz <= NdbNodeBitmask::Size);
24166     memset(req->sr_nodes, 0 , sizeof(req->sr_nodes));
24167     copy(req->sr_nodes, ptr);
24168     releaseSections(handle);
24169   }
24170   else
24171   {
24172     memset(req->sr_nodes + NdbNodeBitmask48::Size,
24173            0,
24174            _NDB_NBM_DIFF_BYTES);
24175   }
24176 
24177   cmasterDihBlockref = req->senderRef;
24178 
24179   ndbrequire(crestartNewestGci == 0 ||
24180              crestartNewestGci == ZUNDEFINED_GCI_LIMIT ||
24181              crestartNewestGci == req->lastCompletedGci);
24182 
24183   crestartOldestGci = req->keepGci;
24184   crestartNewestGci = req->lastCompletedGci;
24185   cnewestGci = req->newestGci;
24186   cstartRecReqData = req->senderData;
24187 
24188   if (check_ndb_versions())
24189   {
24190     ndbrequire(crestartOldestGci <= crestartNewestGci);
24191   }
24192 
24193   ndbrequire(req->receivingNodeId == cownNodeid);
24194 
24195   cnewestCompletedGci = cnewestGci;
24196   c_backup->setRestorableGci(crestartNewestGci);
24197   cstartRecReq = SRR_START_REC_REQ_ARRIVED; // StartRecReq has arrived
24198 
24199   if (signal->getLength() == StartRecReq::SignalLength)
24200   {
24201     jam();
24202     NdbNodeBitmask tmp;
24203     tmp.assign(NdbNodeBitmask::Size, req->sr_nodes);
24204     if (!tmp.equal(m_sr_nodes))
24205     {
24206       char buf0[NdbNodeBitmask::TextLength + 1];
24207       char buf1[NdbNodeBitmask::TextLength + 1];
24208       ndbout_c("execSTART_RECREQ changing srnodes from %s to %s",
24209                m_sr_nodes.getText(buf0),
24210                tmp.getText(buf1));
24211 
24212     }
24213     m_sr_nodes.assign(NdbNodeBitmask::Size, req->sr_nodes);
24214   }
24215   else
24216   {
24217     jam();
24218     cstartRecReqData = RNIL;
24219   }
24220 
24221   for (logPartPtr.i = 0; logPartPtr.i < clogPartFileSize; logPartPtr.i++) {
24222     ptrAss(logPartPtr, logPartRecord);
24223     logPartPtr.p->logPartNewestCompletedGCI = cnewestCompletedGci;
24224   }//for
24225   /* ------------------------------------------------------------------------
24226    *   WE HAVE TO SET THE OLDEST AND THE NEWEST GLOBAL CHECKPOINT IDENTITY
24227    *   THAT WILL SURVIVE THIS SYSTEM RESTART. THIS IS NEEDED SO THAT WE CAN
24228    *   SET THE LOG HEAD AND LOG TAIL PROPERLY BEFORE STARTING THE SYSTEM AGAIN.
24229    *   WE ALSO NEED TO SET CNEWEST_GCI TO ENSURE THAT LOG RECORDS ARE EXECUTED
24230    *   WITH A PROPER GCI.
24231    *------------------------------------------------------------------------ */
24232 
24233   DEB_LCP(("(%u)START_RECREQ: nodeRestorableGci: %u",
24234           instance(),
24235           crestartNewestGci));
24236   if (c_lcp_restoring_fragments.isEmpty() &&
24237       c_lcp_waiting_fragments.isEmpty())
24238   {
24239     jam();
24240 
24241     mark_end_of_lcp_restore(signal);
24242 
24243     csrExecUndoLogState = EULS_STARTED;
24244 
24245     lcpPtr.i = 0;
24246     ptrAss(lcpPtr, lcpRecord);
24247     lcpPtr.p->m_outstanding = 1;
24248 
24249     if (cstartType == NodeState::ST_INITIAL_NODE_RESTART)
24250     {
24251       jam();
24252       /**
24253        * Skip lgman undo...
24254        */
24255       signal->theData[0] = LGMAN_REF;
24256       sendSignal(reference(), GSN_START_RECCONF, signal, 1, JBB);
24257       return;
24258     }
24259 
24260     if (c_restart_lcpId == 0 &&
24261         c_restart_localLcpId == 0 &&
24262         m_restart_local_latest_lcp_id > 1)
24263     {
24264       /**
24265        * Not a single fragment was restored using an LCP. This should only
24266        * in nodes added in new node groups since SYSTAB_0 should be in all
24267        * original nodes from the initial start.
24268        */
24269       jam();
24270       c_restart_lcpId = m_restart_local_latest_lcp_id - 1;
24271     }
24272     if (!isNdbMtLqh())
24273     {
24274       jam();
24275       signal->theData[0] = c_restart_lcpId;
24276       signal->theData[1] = c_restart_localLcpId;
24277       sendSignal(LGMAN_REF, GSN_START_RECREQ, signal, 2, JBB);
24278     }
24279     else
24280     {
24281       jam();
24282       signal->theData[0] = c_restart_lcpId;
24283       signal->theData[1] = c_restart_localLcpId;
24284       signal->theData[2] = LGMAN;
24285       sendSignal(DBLQH_REF, GSN_START_RECREQ, signal, 3, JBB);
24286     }
24287   }//if
24288   if (c_lcp_restoring_fragments.isEmpty() &&
24289       !c_lcp_waiting_fragments.isEmpty())
24290   {
24291     jam();
24292     /**
24293      * This covers the upgrade case where we now know the nodeRestorableGci
24294      * for our node and thus are prepared to move on with restoring fragments.
24295      * When the master is on a newer version we don't need to wait for this to
24296      * happen since there we send this information along with START_FRAGREQ
24297      * already.
24298      *
24299      * We can come here without any START_FRAGREQ being sent.
24300      */
24301     send_restore_lcp(signal);
24302   }
24303 }//Dblqh::execSTART_RECREQ()
24304 
24305 /* ***************>> */
24306 /*  START_RECCONF  > */
24307 /* ***************>> */
execSTART_RECCONF(Signal * signal)24308 void Dblqh::execSTART_RECCONF(Signal* signal)
24309 {
24310   jamEntry();
24311   lcpPtr.i = 0;
24312   ptrAss(lcpPtr, lcpRecord);
24313   ndbrequire(csrExecUndoLogState == EULS_STARTED);
24314   ndbrequire(lcpPtr.p->m_outstanding);
24315 
24316   Uint32 sender= signal->theData[0];
24317 
24318   if (ERROR_INSERTED(5055))
24319   {
24320     CLEAR_ERROR_INSERT_VALUE;
24321   }
24322 
24323   lcpPtr.p->m_outstanding--;
24324   if(lcpPtr.p->m_outstanding)
24325   {
24326     jam();
24327     return;
24328   }
24329 
24330   switch(refToBlock(sender)){
24331   case TSMAN:
24332     jam();
24333     break;
24334   case LGMAN:
24335     jam();
24336     c_tup->verify_undo_log_execution();
24337     lcpPtr.p->m_outstanding++;
24338     if (!isNdbMtLqh())
24339     {
24340       jam();
24341       signal->theData[0] = c_restart_lcpId;
24342       sendSignal(TSMAN_REF, GSN_START_RECREQ, signal, 1, JBB);
24343     }
24344     else
24345     {
24346       jam();
24347       signal->theData[0] = c_restart_lcpId;
24348       signal->theData[1] = 0;
24349       signal->theData[2] = TSMAN;
24350       sendSignal(DBLQH_REF, GSN_START_RECREQ, signal, 3, JBB);
24351     }
24352     return;
24353     break;
24354   default:
24355     ndbabort();
24356   }
24357 
24358   jam();
24359   signal->theData[0] = c_restart_maxLcpId;
24360   signal->theData[1] = c_restart_maxLocalLcpId;
24361   sendSignal(NDBCNTR_REF, GSN_SET_LOCAL_LCP_ID_REQ, signal,
24362              2, JBB);
24363 }
24364 
24365 void
execSET_LOCAL_LCP_ID_CONF(Signal * signal)24366 Dblqh::execSET_LOCAL_LCP_ID_CONF(Signal *signal)
24367 {
24368   jam();
24369   c_restart_maxLcpId = signal->theData[0];
24370   c_restart_maxLocalLcpId = signal->theData[1];
24371   m_curr_lcp_id = c_restart_maxLcpId;
24372   m_curr_local_lcp_id = c_restart_maxLocalLcpId;
24373 
24374   csrExecUndoLogState = EULS_COMPLETED;
24375 
24376   g_eventLogger->info("LDM(%u): Completed DD Undo log application",
24377                       instance());
24378 
24379   sendLOCAL_RECOVERY_COMPLETE_REP(signal,
24380            LocalRecoveryCompleteRep::UNDO_DD_COMPLETED);
24381   if (cstartType == NodeState::ST_INITIAL_NODE_RESTART)
24382   {
24383     jam();
24384     cstartRecReq = SRR_REDO_COMPLETE; // REDO complete
24385 
24386     rebuildOrderedIndexes(signal, 0);
24387     return;
24388   }
24389   c_executing_redo_log = 1;
24390   g_eventLogger->info("LDM(%u): Starting REDO log execution"
24391                       " phase %u",
24392                       instance(),
24393                       csrPhasesCompleted);
24394   startExecSr(signal);
24395 }
24396 
sendLOCAL_RECOVERY_COMPLETE_REP(Signal * signal,LocalRecoveryCompleteRep::PhaseIds phaseId)24397 void Dblqh::sendLOCAL_RECOVERY_COMPLETE_REP(Signal *signal,
24398                                   LocalRecoveryCompleteRep::PhaseIds phaseId)
24399 {
24400   LocalRecoveryCompleteRep *rep =
24401     (LocalRecoveryCompleteRep*)signal->getDataPtrSend();
24402 
24403   rep->nodeId = getOwnNodeId();
24404   rep->phaseId = phaseId;
24405   if (isNdbMtLqh())
24406   {
24407     jam();
24408     rep->senderData = cstartRecReqData;
24409     rep->instanceId = instance();
24410     sendSignal(DBLQH_REF, GSN_LOCAL_RECOVERY_COMP_REP, signal,
24411                LocalRecoveryCompleteRep::SignalLengthLocal, JBB);
24412   }
24413   else
24414   {
24415     jam();
24416     sendSignal(cmasterDihBlockref, GSN_LOCAL_RECOVERY_COMP_REP, signal,
24417                LocalRecoveryCompleteRep::SignalLengthMaster, JBB);
24418   }
24419 }
24420 
24421 /* ***************> */
24422 /*  START_RECREF  > */
24423 /* ***************> */
execSTART_RECREF(Signal * signal)24424 void Dblqh::execSTART_RECREF(Signal* signal)
24425 {
24426   jamEntry();
24427   ndbabort();
24428 }//Dblqh::execSTART_RECREF()
24429 
24430 void
rebuildOrderedIndexes(Signal * signal,Uint32 tableId)24431 Dblqh::rebuildOrderedIndexes(Signal* signal, Uint32 tableId)
24432 {
24433   jamEntry();
24434 
24435   if (tableId == 0)
24436   {
24437     jam();
24438     g_eventLogger->info("LDM(%u): Starting to rebuild ordered indexes",
24439                         instance());
24440 
24441     sendLOCAL_RECOVERY_COMPLETE_REP(signal,
24442                     LocalRecoveryCompleteRep::EXECUTE_REDO_LOG_COMPLETED);
24443   }
24444   if (tableId >= ctabrecFileSize)
24445   {
24446     jam();
24447 
24448     for (logPartPtr.i = 0; logPartPtr.i < clogPartFileSize; logPartPtr.i++)
24449     {
24450       jam();
24451       ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
24452       LogFileRecordPtr logFile;
24453       logFile.i = logPartPtr.p->currentLogfile;
24454       ptrCheckGuard(logFile, clogFileFileSize, logFileRecord);
24455 
24456       LogPosition head = { logFile.p->fileNo, logFile.p->currentMbyte };
24457       LogPosition tail = { logPartPtr.p->logTailFileNo,
24458                            logPartPtr.p->logTailMbyte};
24459       Uint64 free_mb = free_log(head,
24460                                 tail,
24461                                 logPartPtr.p->noLogFiles,
24462                                 clogFileSize);
24463       Uint32 committed_mbytes = get_committed_mbytes(logPartPtr.p);
24464       if (free_mb <=
24465           (c_free_mb_tail_problem_limit + committed_mbytes))
24466       {
24467         jam();
24468         update_log_problem(signal, logPartPtr,
24469                            LogPartRecord::P_TAIL_PROBLEM, true);
24470       }
24471     }
24472 
24473     if (!isNdbMtLqh())
24474     {
24475       /**
24476        * There should be no disk-ops in flight here...check it
24477        */
24478       signal->theData[0] = 12003;
24479       sendSignal(LGMAN_REF, GSN_DUMP_STATE_ORD, signal, 1, JBB);
24480     }
24481 
24482     StartRecConf * conf = (StartRecConf*)signal->getDataPtrSend();
24483     conf->startingNodeId = getOwnNodeId();
24484     conf->senderData = cstartRecReqData;
24485     sendSignal(cmasterDihBlockref, GSN_START_RECCONF, signal,
24486                StartRecConf::SignalLength, JBB);
24487 
24488     g_eventLogger->info("LDM(%u): We have completed restoring our"
24489                         " fragments and executed REDO log and rebuilt"
24490                         " ordered indexes",
24491                         instance());
24492     return;
24493   }
24494 
24495   tabptr.i = tableId;
24496   ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
24497   if (! (DictTabInfo::isOrderedIndex(tabptr.p->tableType) &&
24498          tabptr.p->tableStatus == Tablerec::TABLE_DEFINED))
24499   {
24500     jam();
24501     signal->theData[0] = ZREBUILD_ORDERED_INDEXES;
24502     signal->theData[1] = tableId + 1;
24503     sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
24504     return;
24505   }
24506 
24507   signal->theData[0] = NDB_LE_RebuildIndex;
24508   signal->theData[1] = instance();
24509   signal->theData[2] = tableId;
24510   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
24511 
24512   BuildIndxImplReq* const req = (BuildIndxImplReq*)signal->getDataPtrSend();
24513   req->senderRef = reference();
24514   req->senderData = tableId;
24515   req->requestType = BuildIndxImplReq::RF_BUILD_OFFLINE;
24516   req->buildId = 0;     // not yet..
24517   req->buildKey = 0;    // ..in use
24518   req->transId = 0;
24519   req->indexType = tabptr.p->tableType;
24520   req->indexId = tableId;
24521   req->tableId = tabptr.p->primaryTableId;
24522   req->parallelism = 0;
24523   sendSignal(calcInstanceBlockRef(DBTUP), GSN_BUILD_INDX_IMPL_REQ, signal,
24524              BuildIndxImplReq::SignalLength, JBB);
24525 }
24526 
24527 void
execBUILD_INDX_IMPL_REF(Signal * signal)24528 Dblqh::execBUILD_INDX_IMPL_REF(Signal * signal)
24529 {
24530   jamEntry();
24531   ndbabort(); // TODO error message
24532 }
24533 
24534 void
execBUILD_INDX_IMPL_CONF(Signal * signal)24535 Dblqh::execBUILD_INDX_IMPL_CONF(Signal* signal)
24536 {
24537   jamEntry();
24538   BuildIndxImplConf * conf = (BuildIndxImplConf*)signal->getDataPtr();
24539   Uint32 tableId = conf->senderData;
24540   rebuildOrderedIndexes(signal, tableId + 1);
24541   g_eventLogger->info("LDM(%u): index id %u rebuild done",
24542                       instance(),
24543                       tableId);
24544 }
24545 
24546 /* ***************>> */
24547 /*  START_EXEC_SR  > */
24548 /* ***************>> */
execSTART_EXEC_SR(Signal * signal)24549 void Dblqh::execSTART_EXEC_SR(Signal* signal)
24550 {
24551   jamEntry();
24552   fragptr.i = signal->theData[0];
24553   Uint32 next = RNIL;
24554 
24555   if (fragptr.i == RNIL)
24556   {
24557     jam();
24558     /* ----------------------------------------------------------------------
24559      *    NO MORE FRAGMENTS TO START EXECUTING THE LOG ON.
24560      *    SEND EXEC_SRREQ TO ALL LQH TO INDICATE THAT THIS NODE WILL
24561      *    NOT REQUEST ANY MORE FRAGMENTS TO EXECUTE THE FRAGMENT LOG ON.
24562      * ----------------------------------------------------------------------
24563      *    WE NEED TO SEND THOSE SIGNALS EVEN IF WE HAVE NOT REQUESTED
24564      *    ANY FRAGMENTS PARTICIPATE IN THIS PHASE.
24565      * --------------------------------------------------------------------- */
24566     signal->theData[0] = cownNodeid;
24567     if (!isNdbMtLqh())
24568     {
24569       jam();
24570       NodeReceiverGroup rg(DBLQH, m_sr_nodes);
24571       sendSignal(rg, GSN_EXEC_SRREQ, signal, 1, JBB);
24572     }
24573     else
24574     {
24575       jam();
24576       const Uint32 sz = NdbNodeBitmask::Size;
24577       m_sr_nodes.copyto(sz, &signal->theData[1]);
24578       sendSignal(DBLQH_REF, GSN_EXEC_SRREQ, signal, 1 + sz, JBB);
24579     }
24580     return;
24581   } else {
24582     jam();
24583     c_lcp_complete_fragments.getPtr(fragptr);
24584     next = fragptr.p->nextList;
24585 
24586     if (fragptr.p->srNoLognodes > csrPhasesCompleted)
24587     {
24588       jam();
24589       cnoOutstandingExecFragReq++;
24590 
24591       Uint32 index = csrPhasesCompleted;
24592       arrGuard(index, MAX_LOG_EXEC);
24593       Uint32 Tnode = fragptr.p->srLqhLognode[index];
24594       Uint32 instanceKey = fragptr.p->lqhInstanceKey;
24595       BlockReference ref = numberToRef(DBLQH, instanceKey, Tnode);
24596       fragptr.p->srStatus = Fragrecord::SS_STARTED;
24597 
24598       /* --------------------------------------------------------------------
24599        *  SINCE WE CAN HAVE SEVERAL LQH NODES PER FRAGMENT WE CALCULATE
24600        *  THE LQH POINTER IN SUCH A WAY THAT WE CAN DEDUCE WHICH OF THE
24601        *  LQH NODES THAT HAS RESPONDED WHEN EXEC_FRAGCONF IS RECEIVED.
24602        * ------------------------------------------------------------------- */
24603       ExecFragReq * const execFragReq = (ExecFragReq *)&signal->theData[0];
24604       execFragReq->userPtr = fragptr.i;
24605       execFragReq->userRef = cownref;
24606       execFragReq->tableId = fragptr.p->tabRef;
24607       execFragReq->fragId = fragptr.p->fragId;
24608       execFragReq->startGci = fragptr.p->srStartGci[index];
24609       execFragReq->lastGci = fragptr.p->srLastGci[index];
24610       execFragReq->dst = ref;
24611 
24612       if (isNdbMtLqh())
24613       {
24614         jam();
24615         // send via local proxy
24616         sendSignal(DBLQH_REF, GSN_EXEC_FRAGREQ, signal,
24617                    ExecFragReq::SignalLength, JBB);
24618       }
24619       else
24620       {
24621         jam();
24622         // send via remote proxy
24623         sendSignal(numberToRef(DBLQH, refToNode(ref)), GSN_EXEC_FRAGREQ, signal,
24624                    ExecFragReq::SignalLength, JBB);
24625       }
24626     }
24627     signal->theData[0] = next;
24628     sendSignal(cownref, GSN_START_EXEC_SR, signal, 1, JBB);
24629   }//if
24630   return;
24631 }//Dblqh::execSTART_EXEC_SR()
24632 
24633 /* ***************> */
24634 /*  EXEC_FRAGREQ  > */
24635 /* ***************> */
24636 /* --------------------------------------------------------------------------
24637  *  THIS SIGNAL IS USED TO REQUEST THAT A FRAGMENT PARTICIPATES IN EXECUTING
24638  *  THE LOG IN THIS NODE.
24639  * ------------------------------------------------------------------------- */
execEXEC_FRAGREQ(Signal * signal)24640 void Dblqh::execEXEC_FRAGREQ(Signal* signal)
24641 {
24642   ExecFragReq * const execFragReq = (ExecFragReq *)&signal->theData[0];
24643   jamEntry();
24644   tabptr.i = execFragReq->tableId;
24645   Uint32 fragId = execFragReq->fragId;
24646   ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
24647   ndbrequire(getFragmentrec(signal, fragId));
24648 
24649   ndbrequire(fragptr.p->execSrNoReplicas < MAX_REPLICAS);
24650   fragptr.p->execSrBlockref[fragptr.p->execSrNoReplicas] = execFragReq->userRef;
24651   fragptr.p->execSrUserptr[fragptr.p->execSrNoReplicas] = execFragReq->userPtr;
24652   fragptr.p->execSrStartGci[fragptr.p->execSrNoReplicas] = execFragReq->startGci;
24653   fragptr.p->execSrLastGci[fragptr.p->execSrNoReplicas] = execFragReq->lastGci;
24654   fragptr.p->execSrStatus = Fragrecord::ACTIVE;
24655   fragptr.p->execSrNoReplicas++;
24656   cnoFragmentsExecSr++;
24657   return;
24658 }//Dblqh::execEXEC_FRAGREQ()
24659 
sendExecFragRefLab(Signal * signal)24660 void Dblqh::sendExecFragRefLab(Signal* signal)
24661 {
24662   ExecFragReq * const execFragReq = (ExecFragReq *)&signal->theData[0];
24663   BlockReference retRef = execFragReq->userRef;
24664   Uint32 retPtr = execFragReq->userPtr;
24665 
24666   signal->theData[0] = retPtr;
24667   signal->theData[1] = terrorCode;
24668   sendSignal(retRef, GSN_EXEC_FRAGREF, signal, 2, JBB);
24669   return;
24670 }//Dblqh::sendExecFragRefLab()
24671 
sendSTART_FRAGCONF(Signal * signal)24672 void Dblqh::sendSTART_FRAGCONF(Signal *signal)
24673 {
24674   /**
24675    * This signal is ignored in DIH currently, but we still send it to enable
24676    * future functionality beased on this if needed.
24677    *
24678    * This method is called when we are sure that we have completed any REDO
24679    * log execution needed.
24680    */
24681   fragptr.p->logFlag = Fragrecord::STATE_TRUE;
24682   fragptr.p->fragStatus = Fragrecord::FSACTIVE;
24683 
24684   signal->theData[0] = fragptr.p->srUserptr;
24685   signal->theData[1] = cownNodeid;
24686   sendSignal(fragptr.p->srBlockref, GSN_START_FRAGCONF, signal, 2, JBB);
24687 }
24688 
24689 /* ***************>> */
24690 /*  EXEC_FRAGCONF  > */
24691 /* ***************>> */
execEXEC_FRAGCONF(Signal * signal)24692 void Dblqh::execEXEC_FRAGCONF(Signal* signal)
24693 {
24694   jamEntry();
24695   fragptr.i = signal->theData[0];
24696   c_fragment_pool.getPtr(fragptr);
24697   fragptr.p->srStatus = Fragrecord::SS_COMPLETED;
24698 
24699   ndbrequire(cnoOutstandingExecFragReq);
24700   cnoOutstandingExecFragReq--;
24701   if (fragptr.p->srNoLognodes == csrPhasesCompleted + 1)
24702   {
24703     jam();
24704     sendSTART_FRAGCONF(signal);
24705   }
24706 }//Dblqh::execEXEC_FRAGCONF()
24707 
24708 /* ***************> */
24709 /*  EXEC_FRAGREF  > */
24710 /* ***************> */
execEXEC_FRAGREF(Signal * signal)24711 void Dblqh::execEXEC_FRAGREF(Signal* signal)
24712 {
24713   jamEntry();
24714   terrorCode = signal->theData[1];
24715   systemErrorLab(signal, __LINE__);
24716   return;
24717 }//Dblqh::execEXEC_FRAGREF()
24718 
24719 /* *************** */
24720 /*  EXEC_SRCONF  > */
24721 /* *************** */
execEXEC_SRCONF(Signal * signal)24722 void Dblqh::execEXEC_SRCONF(Signal* signal)
24723 {
24724   jamEntry();
24725   Uint32 nodeId = signal->theData[0];
24726   arrGuard(nodeId, MAX_NDB_NODES);
24727   g_eventLogger->info("LDM(%u): Node %u completed LDM restart"
24728                       " phase 3",
24729                       instance(),
24730                       nodeId);
24731   m_sr_exec_sr_conf.set(nodeId);
24732 
24733   if (!m_sr_nodes.equal(m_sr_exec_sr_conf))
24734   {
24735     jam();
24736     /* ------------------------------------------------------------------
24737      *  ALL NODES HAVE NOT REPORTED COMPLETION OF EXECUTING FRAGMENT
24738      *  LOGS YET.
24739      * ----------------------------------------------------------------- */
24740     return;
24741   }
24742 
24743   if (cnoOutstandingExecFragReq != 0)
24744   {
24745     /**
24746      * This should now have been fixed!
24747      *   but could occur during upgrade
24748      * old: wl4391_todo workaround until timing fixed
24749      */
24750     jam();
24751     m_sr_exec_sr_conf.clear(nodeId);
24752     ndbout << "delay: reqs=" << cnoOutstandingExecFragReq << endl;
24753     ndbabort();
24754     sendSignalWithDelay(reference(), GSN_EXEC_SRCONF,
24755                         signal, 10, signal->getLength());
24756     return;
24757   }
24758 
24759   /* ------------------------------------------------------------------------
24760    *  CLEAR NODE SYSTEM RESTART EXECUTION STATE TO PREPARE FOR NEXT PHASE OF
24761    *  LOG EXECUTION.
24762    * ----------------------------------------------------------------------- */
24763   m_sr_exec_sr_conf.clear();
24764   cnoFragmentsExecSr = 0;
24765 
24766   /* ------------------------------------------------------------------------
24767    *  NOW CHECK IF ALL FRAGMENTS IN THIS PHASE HAVE COMPLETED. IF SO START THE
24768    *  NEXT PHASE.
24769    * ----------------------------------------------------------------------- */
24770   ndbrequire(cnoOutstandingExecFragReq == 0);
24771 
24772   execSrCompletedLab(signal);
24773   return;
24774 }//Dblqh::execEXEC_SRCONF()
24775 
execSrCompletedLab(Signal * signal)24776 void Dblqh::execSrCompletedLab(Signal* signal)
24777 {
24778   csrPhasesCompleted++;
24779 
24780   /* ------------------------------------------------------------------------
24781    *  ALL FRAGMENTS WERE COMPLETED. THIS PHASE IS COMPLETED. IT IS NOW TIME TO
24782    *  START THE NEXT PHASE.
24783    * ----------------------------------------------------------------------- */
24784   if (csrPhasesCompleted >= MAX_LOG_EXEC) {
24785     jam();
24786     /* ----------------------------------------------------------------------
24787      *  THIS WAS THE LAST PHASE. WE HAVE NOW COMPLETED THE EXECUTION THE
24788      *  FRAGMENT LOGS IN ALL NODES. BEFORE WE SEND START_RECCONF TO THE
24789      *  MASTER DIH TO INDICATE A COMPLETED SYSTEM RESTART IT IS NECESSARY
24790      *  TO FIND THE HEAD AND THE TAIL OF THE LOG WHEN NEW OPERATIONS START
24791      *  TO COME AGAIN.
24792      *
24793      * THE FIRST STEP IS TO FIND THE HEAD AND TAIL MBYTE OF EACH LOG PART.
24794      * TO DO THIS WE REUSE THE CONTINUEB SIGNAL SR_LOG_LIMITS. THEN WE
24795      * HAVE TO FIND THE ACTUAL PAGE NUMBER AND PAGE INDEX WHERE TO
24796      * CONTINUE WRITING THE LOG AFTER THE SYSTEM RESTART.
24797      * --------------------------------------------------------------------- */
24798     g_eventLogger->info("LDM(%u): REDO log execution completed, now"
24799                         " finding the new log head + tail",
24800                         instance());
24801 
24802     for (logPartPtr.i = 0; logPartPtr.i < clogPartFileSize; logPartPtr.i++) {
24803       jam();
24804       ptrAss(logPartPtr, logPartRecord);
24805       logPartPtr.p->logPartState = LogPartRecord::SR_FOURTH_PHASE_STARTED;
24806       logPartPtr.p->logLastGci = crestartNewestGci;
24807       logPartPtr.p->logStartGci = crestartOldestGci;
24808       logPartPtr.p->logExecState = LogPartRecord::LES_SEARCH_STOP;
24809       if (logPartPtr.p->headFileNo == ZNIL) {
24810         jam();
24811 	/* -----------------------------------------------------------------
24812 	 *  IF WE HAVEN'T FOUND ANY HEAD OF THE LOG THEN WE ARE IN SERIOUS
24813 	 *  PROBLEM.  THIS SHOULD NOT OCCUR. IF IT OCCURS ANYWAY THEN WE
24814 	 *  HAVE TO FIND A CURE FOR THIS PROBLEM.
24815 	 * ----------------------------------------------------------------- */
24816         systemErrorLab(signal, __LINE__);
24817         return;
24818       }//if
24819 
24820       if (DEBUG_REDO)
24821       {
24822         ndbout_c("part: %u srLogLimits SR_FOURTH_PHASE %u-%u (file: %u mb: %u)",
24823                  logPartPtr.p->logPartNo,
24824                  logPartPtr.p->logStartGci,
24825                  logPartPtr.p->logLastGci,
24826                  logPartPtr.p->lastLogfile,
24827                  logPartPtr.p->lastMbyte);
24828       }
24829 
24830       signal->theData[0] = ZSR_LOG_LIMITS;
24831       signal->theData[1] = logPartPtr.i;
24832       signal->theData[2] = logPartPtr.p->lastLogfile;
24833       signal->theData[3] = logPartPtr.p->lastMbyte;
24834       sendSignal(cownref, GSN_CONTINUEB, signal, 4, JBB);
24835     }//for
24836     return;
24837   }
24838   else
24839   {
24840     jam();
24841     /* ----------------------------------------------------------------------
24842      *   THERE ARE YET MORE PHASES TO RESTART.
24843      *   WE MUST INITIALISE DATA FOR NEXT PHASE AND SEND START SIGNAL.
24844      * --------------------------------------------------------------------- */
24845     csrPhaseStarted = ZSR_PHASE1_COMPLETED; // Set correct state first...
24846     g_eventLogger->info("LDM(%u): Starting REDO log execution"
24847                         " phase %u",
24848                         instance(),
24849                         csrPhasesCompleted);
24850     startExecSr(signal);
24851   }//if
24852   return;
24853 }//Dblqh::execSrCompletedLab()
24854 
24855 /* ************>> */
24856 /*  EXEC_SRREQ  > */
24857 /* ************>> */
execEXEC_SRREQ(Signal * signal)24858 void Dblqh::execEXEC_SRREQ(Signal* signal)
24859 {
24860   jamEntry();
24861   Uint32 nodeId = signal->theData[0];
24862   ndbrequire(nodeId < MAX_NDB_NODES);
24863   g_eventLogger->info("LDM(%u): Node %u ready to execute REDO log",
24864                       instance(),
24865                       nodeId);
24866   m_sr_exec_sr_req.set(nodeId);
24867   if (!m_sr_exec_sr_req.equal(m_sr_nodes))
24868   {
24869     jam();
24870     return;
24871   }
24872 
24873   /* ------------------------------------------------------------------------
24874    *  CLEAR NODE SYSTEM RESTART STATE TO PREPARE FOR NEXT PHASE OF LOG
24875    *  EXECUTION
24876    * ----------------------------------------------------------------------- */
24877   m_sr_exec_sr_req.clear();
24878 
24879   g_eventLogger->info("LDM(%u): All starting nodes ready"
24880                       " to execute REDO log.  Phases completed = %u",
24881                       instance(),
24882                       csrPhasesCompleted);
24883 
24884   if (csrPhasesCompleted != 0) {
24885     /* ----------------------------------------------------------------------
24886      *       THE FIRST PHASE MUST ALWAYS EXECUTE THE LOG.
24887      * --------------------------------------------------------------------- */
24888     if (cnoFragmentsExecSr == 0) {
24889       jam();
24890       /* --------------------------------------------------------------------
24891        *  THERE WERE NO FRAGMENTS THAT NEEDED TO EXECUTE THE LOG IN THIS PHASE.
24892        * ------------------------------------------------------------------- */
24893       srPhase3Comp(signal);
24894       return;
24895     }//if
24896   }//if
24897   /* ------------------------------------------------------------------------
24898    *  NOW ALL NODES HAVE SENT ALL EXEC_FRAGREQ. NOW WE CAN START EXECUTING THE
24899    *  LOG FROM THE MINIMUM GCI NEEDED UNTIL THE MAXIMUM GCI NEEDED.
24900    *
24901    *  WE MUST FIRST CHECK IF THE FIRST PHASE OF THE SYSTEM RESTART HAS BEEN
24902    *  COMPLETED. THIS HANDLING IS PERFORMED IN THE FILE SYSTEM MODULE
24903    * ----------------------------------------------------------------------- */
24904 
24905   g_eventLogger->info("LDM(%u):"
24906                       "Ready to start execute REDO log phase,"
24907                       " collect REDO log execution info phase completed",
24908                       instance());
24909 
24910   signal->theData[0] = ZSR_PHASE3_START;
24911   signal->theData[1] = ZSR_PHASE2_COMPLETED;
24912   sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
24913   return;
24914 }//Dblqh::execEXEC_SRREQ()
24915 
24916 /* ######################################################################### */
24917 /*       SYSTEM RESTART PHASE THREE MODULE                                   */
24918 /*       THIS MODULE IS A SUB-MODULE OF THE FILE SYSTEM HANDLING.            */
24919 /*                                                                           */
24920 /* THIS MODULE IS CONCERNED WITH EXECUTING THE FRAGMENT LOG. IT DOES ALSO    */
24921 /* CONTAIN SIGNAL RECEPTIONS LQHKEYCONF AND LQHKEYREF SINCE LQHKEYREQ IS USED*/
24922 /* TO EXECUTE THE LOG RECORDS.                                               */
24923 /*                                                                           */
24924 /* BEFORE IT STARTS IT HAS BEEN DECIDED WHERE TO START AND WHERE TO STOP     */
24925 /* READING THE FRAGMENT LOG BY USING THE INFORMATION ABOUT GCI DISCOVERED IN */
24926 /* PHASE ONE OF THE SYSTEM RESTART.                                          */
24927 /* ######################################################################### */
24928 /*---------------------------------------------------------------------------*/
24929 /* PHASE THREE OF THE SYSTEM RESTART CAN NOW START. ONE OF THE PHASES HAVE   */
24930 /* COMPLETED.                                                                */
24931 /*---------------------------------------------------------------------------*/
srPhase3Start(Signal * signal)24932 void Dblqh::srPhase3Start(Signal* signal)
24933 {
24934   UintR tsrPhaseStarted;
24935 
24936   jamEntry();
24937 
24938   tsrPhaseStarted = signal->theData[1];
24939   if (csrPhaseStarted == ZSR_NO_PHASE_STARTED) {
24940     jam();
24941     csrPhaseStarted = tsrPhaseStarted;
24942     return;
24943   }//if
24944   ndbrequire(csrPhaseStarted != tsrPhaseStarted);
24945   ndbrequire(csrPhaseStarted != ZSR_BOTH_PHASES_STARTED);
24946 
24947   csrPhaseStarted = ZSR_BOTH_PHASES_STARTED;
24948   for (logPartPtr.i = 0; logPartPtr.i < clogPartFileSize; logPartPtr.i++) {
24949     jam();
24950     ptrAss(logPartPtr, logPartRecord);
24951     logPartPtr.p->logPartState = LogPartRecord::SR_THIRD_PHASE_STARTED;
24952     logPartPtr.p->logStartGci = (UintR)-1;
24953     if (csrPhasesCompleted == 0) {
24954       jam();
24955       /* --------------------------------------------------------------------
24956        *  THE FIRST PHASE WE MUST ENSURE THAT IT REACHES THE END OF THE LOG.
24957        * ------------------------------------------------------------------- */
24958       logPartPtr.p->logLastGci = crestartNewestGci;
24959     } else {
24960       jam();
24961       logPartPtr.p->logLastGci = 2;
24962     }//if
24963   }//for
24964 
24965   jam();
24966   c_lcp_complete_fragments.first(fragptr);
24967   signal->theData[0] = ZSR_GCI_LIMITS;
24968   signal->theData[1] = fragptr.i;
24969   sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
24970   return;
24971 }//Dblqh::srPhase3Start()
24972 
24973 /* --------------------------------------------------------------------------
24974  *   WE NOW WE NEED TO FIND THE LIMITS WITHIN WHICH TO EXECUTE
24975  *   THE FRAGMENT LOG
24976  * ------------------------------------------------------------------------- */
srGciLimits(Signal * signal)24977 void Dblqh::srGciLimits(Signal* signal)
24978 {
24979   jamEntry();
24980   fragptr.i = signal->theData[0];
24981   Uint32 loopCount = 0;
24982   logPartPtr.i = 0;
24983   ptrAss(logPartPtr, logPartRecord);
24984   while (fragptr.i != RNIL){
24985     jam();
24986     c_lcp_complete_fragments.getPtr(fragptr);
24987     ndbrequire(fragptr.p->execSrNoReplicas - 1 < MAX_REPLICAS);
24988     for (Uint32 i = 0; i < fragptr.p->execSrNoReplicas; i++) {
24989       jam();
24990       if (fragptr.p->execSrStartGci[i] < logPartPtr.p->logStartGci) {
24991 	jam();
24992 	logPartPtr.p->logStartGci = fragptr.p->execSrStartGci[i];
24993       }//if
24994       if (fragptr.p->execSrLastGci[i] > logPartPtr.p->logLastGci) {
24995 	jam();
24996         /**
24997          * We cannot run past the end point in the REDO log in our node.
24998          */
24999         ndbrequire(csrPhasesCompleted != 0);
25000 	logPartPtr.p->logLastGci = fragptr.p->execSrLastGci[i];
25001       }
25002     }
25003 
25004     loopCount++;
25005     if (loopCount > 20) {
25006       jam();
25007       signal->theData[0] = ZSR_GCI_LIMITS;
25008       signal->theData[1] = fragptr.p->nextList;
25009       sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
25010       return;
25011     } else {
25012       jam();
25013       fragptr.i = fragptr.p->nextList;
25014     }//if
25015   }
25016 
25017   if (logPartPtr.p->logStartGci == (UintR)-1) {
25018     jam();
25019       /* --------------------------------------------------------------------
25020        *  THERE WERE NO FRAGMENTS TO INSTALL WE WILL EXECUTE THE LOG AS
25021        *  SHORT AS POSSIBLE TO REACH THE END OF THE LOG. THIS WE DO BY
25022        *  STARTING AT THE STOP GCI.
25023        * ------------------------------------------------------------------- */
25024     logPartPtr.p->logStartGci = logPartPtr.p->logLastGci;
25025   }//if
25026 
25027   for(Uint32 i = 1; i < clogPartFileSize; i++)
25028   {
25029     LogPartRecordPtr tmp;
25030     tmp.i = i;
25031     ptrAss(tmp, logPartRecord);
25032     tmp.p->logStartGci = logPartPtr.p->logStartGci;
25033     tmp.p->logLastGci = logPartPtr.p->logLastGci;
25034   }
25035 
25036   for (logPartPtr.i = 0; logPartPtr.i < clogPartFileSize; logPartPtr.i++) {
25037     jam();
25038     ptrAss(logPartPtr, logPartRecord);
25039     logPartPtr.p->logExecState = LogPartRecord::LES_SEARCH_STOP;
25040     if (DEBUG_REDO)
25041     {
25042       ndbout_c("part: %u srLogLimits (srGciLimits) %u-%u (file: %u mb: %u)",
25043                logPartPtr.p->logPartNo,
25044                logPartPtr.p->logStartGci,
25045                logPartPtr.p->logLastGci,
25046                logPartPtr.p->lastLogfile,
25047                logPartPtr.p->lastMbyte);
25048     }
25049     signal->theData[0] = ZSR_LOG_LIMITS;
25050     signal->theData[1] = logPartPtr.i;
25051     signal->theData[2] = logPartPtr.p->lastLogfile;
25052     signal->theData[3] = logPartPtr.p->lastMbyte;
25053     sendSignal(cownref, GSN_CONTINUEB, signal, 4, JBB);
25054 
25055     logFilePtr.i = logPartPtr.p->lastLogfile;
25056     ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
25057     g_eventLogger->info("LDM(%u): Log part %u will execute REDO log"
25058                         " records from GCI %u -> %u and last log file is "
25059                         "number %u and last MByte in this file is %u",
25060                         instance(),
25061                         logPartPtr.p->logPartNo,
25062                         logPartPtr.p->logStartGci,
25063                         logPartPtr.p->logLastGci,
25064                         logFilePtr.p->fileNo, /* fileNo of last log file */
25065                         logPartPtr.p->lastMbyte);
25066   }//for
25067 }//Dblqh::srGciLimits()
25068 
25069 /* --------------------------------------------------------------------------
25070  *       IT IS NOW TIME TO FIND WHERE TO START EXECUTING THE LOG.
25071  *       THIS SIGNAL IS SENT FOR EACH LOG PART AND STARTS THE EXECUTION
25072  *       OF THE LOG FOR THIS PART.
25073  *-------------------------------------------------------------------------- */
srLogLimits(Signal * signal)25074 void Dblqh::srLogLimits(Signal* signal)
25075 {
25076   Uint32 tlastPrepRef = 0;
25077   Uint32 tmbyte;
25078 
25079   jamEntry();
25080   logPartPtr.i = signal->theData[0];
25081   ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
25082   logFilePtr.i = signal->theData[1];
25083   ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
25084   tmbyte = signal->theData[2];
25085   Uint32 loopCount = 0;
25086   /* ------------------------------------------------------------------------
25087    *   WE ARE SEARCHING FOR THE START AND STOP MBYTE OF THE LOG THAT IS TO BE
25088    *   EXECUTED.
25089    * ----------------------------------------------------------------------- */
25090   while(true) {
25091     ndbrequire(tmbyte < clogFileSize);
25092     if (logPartPtr.p->logExecState == LogPartRecord::LES_SEARCH_STOP)
25093     {
25094       if (logFilePtr.p->logMaxGciCompleted[tmbyte] <= logPartPtr.p->logLastGci)
25095       {
25096         jam();
25097         /* --------------------------------------------------------------------
25098          *  WE ARE STEPPING BACKWARDS FROM MBYTE TO MBYTE. THIS IS THE FIRST
25099          *  MBYTE WHICH IS TO BE INCLUDED IN THE LOG EXECUTION. THE STOP GCI
25100          *  HAS NOT BEEN COMPLETED BEFORE THIS MBYTE. THUS THIS MBYTE HAVE
25101          *  TO BE EXECUTED.
25102          * ------------------------------------------------------------------ */
25103         logPartPtr.p->stopLogfile = logFilePtr.i;
25104         logPartPtr.p->stopMbyte = tmbyte;
25105         logPartPtr.p->logExecState = LogPartRecord::LES_SEARCH_START;
25106         if (DEBUG_REDO)
25107         {
25108           ndbout_c("part: %u srLogLimits found stop pos file: %u mb: %u logMaxGciCompleted[tmbyte]: %u (lastGci: %u)",
25109                    logPartPtr.p->logPartNo,
25110                    logFilePtr.p->fileNo,
25111                    tmbyte,
25112                    logFilePtr.p->logMaxGciCompleted[tmbyte],
25113                    logPartPtr.p->logLastGci);
25114         }
25115       }//if
25116       else if (DEBUG_REDO)
25117       {
25118         ndbout_c("SEARCH STOP SKIP part: %u file: %u mb: %u "
25119                  "logMaxGciCompleted: %u > %u",
25120                  logPartPtr.p->logPartNo,
25121                  logFilePtr.p->fileNo,
25122                  tmbyte,
25123                  logFilePtr.p->logMaxGciCompleted[tmbyte],
25124                  logPartPtr.p->logLastGci);
25125       }
25126     }//if
25127     /* ------------------------------------------------------------------------
25128      *  WHEN WE HAVEN'T FOUND THE STOP MBYTE IT IS NOT NECESSARY TO LOOK FOR THE
25129      *  START MBYTE. THE REASON IS THE FOLLOWING LOGIC CHAIN:
25130      *    MAX_GCI_STARTED >= MAX_GCI_COMPLETED >= LAST_GCI >= START_GCI
25131      *  THUS MAX_GCI_STARTED >= START_GCI. THUS MAX_GCI_STARTED < START_GCI CAN
25132      *  NOT BE TRUE AS WE WILL CHECK OTHERWISE.
25133      * ---------------------------------------------------------------------- */
25134     if (logPartPtr.p->logExecState == LogPartRecord::LES_SEARCH_START)
25135     {
25136       if (logFilePtr.p->logMaxGciStarted[tmbyte] < logPartPtr.p->logStartGci)
25137       {
25138         jam();
25139         /* --------------------------------------------------------------------
25140          *  WE HAVE NOW FOUND THE START OF THE EXECUTION OF THE LOG.
25141          *  WE STILL HAVE TO MOVE IT BACKWARDS TO ALSO INCLUDE THE
25142          *  PREPARE RECORDS WHICH WERE STARTED IN A PREVIOUS MBYTE.
25143          * ------------------------------------------------------------------ */
25144         if (DEBUG_REDO)
25145         {
25146           ndbout_c("part: %u srLogLimits found start pos file: %u mb: %u logMaxGciStarted[tmbyte]: %u (startGci: %u)",
25147                    logPartPtr.p->logPartNo,
25148                    logFilePtr.p->fileNo,
25149                    tmbyte,
25150                    logFilePtr.p->logMaxGciCompleted[tmbyte],
25151                    logPartPtr.p->logStartGci);
25152           ndbout_c("part: %u srLogLimits lastPrepRef => file: %u mb: %u",
25153                    logPartPtr.p->logPartNo,
25154                    logFilePtr.p->logLastPrepRef[tmbyte] >> 16,
25155                    logFilePtr.p->logLastPrepRef[tmbyte] & 65535);
25156         }
25157         tlastPrepRef = logFilePtr.p->logLastPrepRef[tmbyte];
25158         logPartPtr.p->startMbyte = tlastPrepRef & 65535;
25159         LogFileRecordPtr locLogFilePtr;
25160         findLogfile(signal, tlastPrepRef >> 16, logPartPtr, &locLogFilePtr);
25161         logPartPtr.p->startLogfile = locLogFilePtr.i;
25162         logPartPtr.p->logExecState = LogPartRecord::LES_EXEC_LOG;
25163       }
25164       else if (DEBUG_REDO)
25165       {
25166         ndbout_c("SEARCH START SKIP part: %u file: %u mb: %u "
25167                  "logMaxGciCompleted: %u >= %u",
25168                  logPartPtr.p->logPartNo,
25169                  logFilePtr.p->fileNo,
25170                  tmbyte,
25171                  logFilePtr.p->logMaxGciStarted[tmbyte],
25172                  logPartPtr.p->logStartGci);
25173       }
25174     }//if
25175     if (logPartPtr.p->logExecState != LogPartRecord::LES_EXEC_LOG) {
25176       if (tmbyte == 0) {
25177         jam();
25178         tmbyte = clogFileSize - 1;
25179         logFilePtr.i = logFilePtr.p->prevLogFile;
25180         ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
25181       } else {
25182         jam();
25183         tmbyte--;
25184       }//if
25185       if (logPartPtr.p->lastLogfile == logFilePtr.i) {
25186         ndbrequire(logPartPtr.p->lastMbyte != tmbyte);
25187       }//if
25188       if (loopCount > 20) {
25189         jam();
25190         signal->theData[0] = ZSR_LOG_LIMITS;
25191         signal->theData[1] = logPartPtr.i;
25192         signal->theData[2] = logFilePtr.i;
25193         signal->theData[3] = tmbyte;
25194         sendSignal(cownref, GSN_CONTINUEB, signal, 4, JBB);
25195         return;
25196       }//if
25197       loopCount++;
25198     } else {
25199       jam();
25200       break;
25201     }//if
25202   }//while
25203 
25204   if (DEBUG_REDO)
25205   {
25206     LogFileRecordPtr tmp;
25207     tmp.i = logPartPtr.p->stopLogfile;
25208     ptrCheckGuard(tmp, clogFileFileSize, logFileRecord);
25209     ndbout_c("srLogLimits part: %u gci: %u-%u start file: %u mb: %u stop file: %u mb: %u",
25210              logPartPtr.p->logPartNo,
25211              logPartPtr.p->logStartGci,
25212              logPartPtr.p->logLastGci,
25213              tlastPrepRef >> 16,
25214              tlastPrepRef & 65535,
25215              tmp.p->fileNo,
25216              logPartPtr.p->stopMbyte);
25217   }
25218 
25219 
25220 
25221   /* ------------------------------------------------------------------------
25222    *  WE HAVE NOW FOUND BOTH THE START AND THE STOP OF THE LOG. NOW START
25223    *  EXECUTING THE LOG. THE FIRST ACTION IS TO OPEN THE LOG FILE WHERE TO
25224    *  START EXECUTING THE LOG.
25225    * ----------------------------------------------------------------------- */
25226   if (logPartPtr.p->logPartState == LogPartRecord::SR_THIRD_PHASE_STARTED) {
25227     jam();
25228     logFilePtr.i = logPartPtr.p->startLogfile;
25229     ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
25230     logFilePtr.p->logFileStatus = LogFileRecord::OPEN_EXEC_SR_START;
25231     openFileRw(signal, logFilePtr);
25232     g_eventLogger->info("LDM(%u): Start executing REDO log for"
25233                         " part %u",
25234                         instance(),
25235                         logPartPtr.p->logPartNo);
25236 
25237     send_runredo_event(signal, logPartPtr.p, logPartPtr.p->logStartGci);
25238   }
25239   else
25240   {
25241     jam();
25242 
25243     g_eventLogger->info("LDM(%u): Found log limits for REDO"
25244                         " post-restart for log part %u",
25245                         instance(),
25246                         logPartPtr.p->logPartNo);
25247 
25248     ndbrequire(logPartPtr.p->logPartState == LogPartRecord::SR_FOURTH_PHASE_STARTED);
25249       /* --------------------------------------------------------------------
25250        *  WE HAVE NOW FOUND THE TAIL MBYTE IN THE TAIL FILE.
25251        *  SET THOSE PARAMETERS IN THE LOG PART.
25252        *  WE HAVE ALSO FOUND THE HEAD MBYTE. WE STILL HAVE TO SEARCH
25253        *  FOR THE PAGE NUMBER AND PAGE INDEX WHERE TO SET THE HEAD.
25254        * ------------------------------------------------------------------- */
25255     logFilePtr.i = logPartPtr.p->startLogfile;
25256     ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
25257     logPartPtr.p->logTailFileNo = logFilePtr.p->fileNo;
25258     logPartPtr.p->logTailMbyte = logPartPtr.p->startMbyte;
25259       /* --------------------------------------------------------------------
25260        *  THE HEAD WE ACTUALLY FOUND DURING EXECUTION OF LOG SO WE USE
25261        *  THIS INFO HERE RATHER THAN THE MBYTE WE FOUND TO BE THE HEADER.
25262        * ------------------------------------------------------------------- */
25263     LogFileRecordPtr locLogFilePtr;
25264     findLogfile(signal, logPartPtr.p->headFileNo, logPartPtr, &locLogFilePtr);
25265     locLogFilePtr.p->logFileStatus = LogFileRecord::OPEN_SR_FOURTH_PHASE;
25266     openFileRw(signal, locLogFilePtr);
25267   }//if
25268   return;
25269 }//Dblqh::srLogLimits()
25270 
openExecSrStartLab(Signal * signal)25271 void Dblqh::openExecSrStartLab(Signal* signal)
25272 {
25273   logPartPtr.p->currentLogfile = logFilePtr.i;
25274   logFilePtr.p->currentMbyte = logPartPtr.p->startMbyte;
25275   /* ------------------------------------------------------------------------
25276    *     WE NEED A TC CONNECT RECORD TO HANDLE EXECUTION OF LOG RECORDS.
25277    *     This will always succeed since we don't interact with user
25278    *     operations during recovery when we are applying the REDO log content.
25279    * ------------------------------------------------------------------------ */
25280   TcConnectionrecPtr tcConnectptr;
25281   seizeTcrec(tcConnectptr);
25282   ndbrequire(Magic::check_ptr(tcConnectptr.p));
25283   logPartPtr.p->logTcConrec = tcConnectptr.i;
25284   /* ------------------------------------------------------------------------
25285    *   THE FIRST LOG RECORD TO EXECUTE IS ALWAYS AT A NEW MBYTE.
25286    *   SET THE NUMBER OF PAGES IN THE MAIN MEMORY BUFFER TO ZERO AS AN INITIAL
25287    *   VALUE. THIS VALUE WILL BE UPDATED AND ENSURED THAT IT RELEASES PAGES IN
25288    *   THE SUBROUTINE READ_EXEC_SR.
25289    * ----------------------------------------------------------------------- */
25290   logPartPtr.p->mmBufferSize = 0;
25291   readExecSrNewMbyte(signal);
25292   return;
25293 }//Dblqh::openExecSrStartLab()
25294 
25295 /* ---------------------------------------------------------------------------
25296  *  WE WILL ALWAYS ENSURE THAT WE HAVE AT LEAST 16 KBYTE OF LOG PAGES WHEN WE
25297  *  START READING A LOG RECORD. THE ONLY EXCEPTION IS WHEN WE COME CLOSE TO A
25298  *  MBYTE BOUNDARY. SINCE WE KNOW THAT LOG RECORDS ARE NEVER WRITTEN ACROSS A
25299  *  MBYTE BOUNDARY THIS IS NOT A PROBLEM.
25300  *
25301  *  WE START BY READING 64 KBYTE BEFORE STARTING TO EXECUTE THE LOG RECORDS.
25302  *  WHEN WE COME BELOW 64 KBYTE WE READ ANOTHER SET OF LOG PAGES. WHEN WE
25303  *  GO BELOW 16 KBYTE WE WAIT UNTIL THE READ PAGES HAVE ENTERED THE BLOCK.
25304  * ------------------------------------------------------------------------- */
25305 /* --------------------------------------------------------------------------
25306  *       NEW PAGES FROM LOG FILE DURING EXECUTION OF LOG HAS ARRIVED.
25307  * ------------------------------------------------------------------------- */
readExecSrLab(Signal * signal)25308 void Dblqh::readExecSrLab(Signal* signal)
25309 {
25310   buildLinkedLogPageList(signal);
25311   /* ------------------------------------------------------------------------
25312    *   WE NEED TO SET THE CURRENT PAGE INDEX OF THE FIRST PAGE SINCE IT CAN BE
25313    *   USED IMMEDIATELY WITHOUT ANY OTHER INITIALISATION. THE REST OF THE PAGES
25314    *   WILL BE INITIALISED BY READ_LOGWORD.
25315    * ----------------------------------------------------------------------- */
25316   logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = ZPAGE_HEADER_SIZE;
25317   if (logPartPtr.p->logExecState ==
25318       LogPartRecord::LES_WAIT_READ_EXEC_SR_NEW_MBYTE) {
25319     jam();
25320     /* ----------------------------------------------------------------------
25321      *  THIS IS THE FIRST READ DURING THE EXECUTION OF THIS MBYTE. SET THE
25322      *  NEW CURRENT LOG PAGE TO THE FIRST OF THESE PAGES. CHANGE
25323      *  LOG_EXEC_STATE TO ENSURE THAT WE START EXECUTION OF THE LOG.
25324      * --------------------------------------------------------------------- */
25325     logFilePtr.p->currentFilepage = logFilePtr.p->currentMbyte *
25326                                     ZPAGES_IN_MBYTE;
25327     logPartPtr.p->prevFilepage = logFilePtr.p->currentFilepage;
25328     logFilePtr.p->currentLogpage = lfoPtr.p->firstLfoPage;
25329     logPartPtr.p->prevLogpage = logFilePtr.p->currentLogpage;
25330   }//if
25331   moveToPageRef(signal);
25332   releaseLfo(signal);
25333   /* ------------------------------------------------------------------------
25334    *  NOW WE HAVE COMPLETED THE RECEPTION OF THESE PAGES.
25335    *  NOW CHECK IF WE NEED TO READ MORE PAGES.
25336    * ----------------------------------------------------------------------- */
25337   checkReadExecSr(signal);
25338   if (logPartPtr.p->logExecState == LogPartRecord::LES_EXEC_LOG) {
25339     jam();
25340     signal->theData[0] = ZEXEC_SR;
25341     signal->theData[1] = logPartPtr.i;
25342     sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
25343     return;
25344   }//if
25345   return;
25346 }//Dblqh::readExecSrLab()
25347 
openExecSrNewMbyteLab(Signal * signal)25348 void Dblqh::openExecSrNewMbyteLab(Signal* signal)
25349 {
25350   readExecSrNewMbyte(signal);
25351   return;
25352 }//Dblqh::openExecSrNewMbyteLab()
25353 
closeExecSrLab(Signal * signal)25354 void Dblqh::closeExecSrLab(Signal* signal)
25355 {
25356   LogFileRecordPtr locLogFilePtr;
25357   logFilePtr.p->logFileStatus = LogFileRecord::CLOSED;
25358   logPartPtr.i = logFilePtr.p->logPartRec;
25359   ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
25360   locLogFilePtr.i = logPartPtr.p->currentLogfile;
25361   ptrCheckGuard(locLogFilePtr, clogFileFileSize, logFileRecord);
25362   locLogFilePtr.p->logFileStatus = LogFileRecord::OPEN_EXEC_SR_NEW_MBYTE;
25363   openFileRw(signal, locLogFilePtr);
25364   return;
25365 }//Dblqh::closeExecSrLab()
25366 
writeDirtyLab(Signal * signal)25367 void Dblqh::writeDirtyLab(Signal* signal)
25368 {
25369   releaseLfo(signal);
25370   signal->theData[0] = logPartPtr.i;
25371   execSr(signal);
25372   return;
25373 }//Dblqh::writeDirtyLab()
25374 
25375 /* --------------------------------------------------------------------------
25376  *       EXECUTE A LOG RECORD WITHIN THE CURRENT MBYTE.
25377  * ------------------------------------------------------------------------- */
execSr(Signal * signal)25378 void Dblqh::execSr(Signal* signal)
25379 {
25380   LogFileRecordPtr nextLogFilePtr;
25381   LogPageRecordPtr tmpLogPagePtr;
25382   Uint32 logWord;
25383   Uint32 line;
25384   const char * crash_msg = 0;
25385 
25386   jamEntry();
25387   logPartPtr.i = signal->theData[0];
25388   ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
25389 
25390   do {
25391     jam();
25392     logFilePtr.i = logPartPtr.p->currentLogfile;
25393     ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
25394     logPagePtr.i = logPartPtr.p->prevLogpage;
25395     ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
25396     if (logPagePtr.p->logPageWord[ZPOS_DIRTY] == ZDIRTY) {
25397       jam();
25398       switch (logPartPtr.p->logExecState) {
25399       case LogPartRecord::LES_EXEC_LOG_COMPLETED:
25400       case LogPartRecord::LES_EXEC_LOG_NEW_FILE:
25401       case LogPartRecord::LES_EXEC_LOG_NEW_MBYTE:
25402         jam();
25403 	/* ------------------------------------------------------------------
25404 	 *  IN THIS WE HAVE COMPLETED EXECUTION OF THE CURRENT LOG PAGE
25405 	 *  AND CAN WRITE IT TO DISK SINCE IT IS DIRTY.
25406 	 * ----------------------------------------------------------------- */
25407         writeDirty(signal, __LINE__);
25408         return;
25409         break;
25410       case LogPartRecord::LES_EXEC_LOG:
25411       jam();
25412       /* --------------------------------------------------------------------
25413        *  IN THIS CASE WE ONLY WRITE THE PAGE TO DISK IF WE HAVE COMPLETED
25414        *  EXECUTION OF LOG RECORDS BELONGING TO THIS LOG PAGE.
25415        * ------------------------------------------------------------------- */
25416         if (logFilePtr.p->currentLogpage != logPartPtr.p->prevLogpage) {
25417           jam();
25418           writeDirty(signal, __LINE__);
25419           return;
25420         }//if
25421         break;
25422       default:
25423         ndbabort();
25424       }//switch
25425     }//if
25426     if (logFilePtr.p->currentLogpage != logPartPtr.p->prevLogpage) {
25427       jam();
25428       logPartPtr.p->prevLogpage = logPagePtr.p->logPageWord[ZNEXT_PAGE];
25429       logPartPtr.p->prevFilepage++;
25430       continue;
25431     }//if
25432     switch (logPartPtr.p->logExecState) {
25433     case LogPartRecord::LES_EXEC_LOG_COMPLETED:
25434       jam();
25435       releaseMmPages(signal);
25436       logFilePtr.p->logFileStatus = LogFileRecord::CLOSING_EXEC_SR_COMPLETED;
25437       closeFile(signal, logFilePtr, __LINE__);
25438       return;
25439       break;
25440     case LogPartRecord::LES_EXEC_LOG_NEW_MBYTE:
25441       jam();
25442       logFilePtr.p->currentMbyte++;
25443       readExecSrNewMbyte(signal);
25444       return;
25445       break;
25446     case LogPartRecord::LES_EXEC_LOG_NEW_FILE:
25447       jam();
25448       nextLogFilePtr.i = logFilePtr.p->nextLogFile;
25449       logPartPtr.p->currentLogfile = nextLogFilePtr.i;
25450       ptrCheckGuard(nextLogFilePtr, clogFileFileSize, logFileRecord);
25451       nextLogFilePtr.p->currentMbyte = 0;
25452       logFilePtr.p->logFileStatus = LogFileRecord::CLOSING_EXEC_SR;
25453       closeFile(signal, logFilePtr, __LINE__);
25454       return;
25455       break;
25456     case LogPartRecord::LES_EXEC_LOG:
25457       jam();
25458       /*empty*/;
25459       break;
25460     default:
25461       jam();
25462       systemErrorLab(signal, __LINE__);
25463       return;
25464       break;
25465     }//switch
25466     logPagePtr.i = logFilePtr.p->currentLogpage;
25467     ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
25468     logPartPtr.p->savePageIndex = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX];
25469     if (logPartPtr.p->execSrPagesRead < ZMIN_READ_BUFFER_SIZE) {
25470       /* --------------------------------------------------------------------
25471        *  THERE WERE LESS THAN 16 KBYTE OF LOG PAGES REMAINING. WE WAIT UNTIL
25472        *  THE NEXT 64 KBYTE ARRIVES UNTIL WE CONTINUE AGAIN.
25473        * ------------------------------------------------------------------- */
25474       if ((logPartPtr.p->execSrPagesRead +
25475 	   logPartPtr.p->execSrPagesExecuted) < ZPAGES_IN_MBYTE) {
25476         jam();
25477 	/* ------------------------------------------------------------------
25478 	 *  WE ONLY STOP AND WAIT IF THERE MORE PAGES TO READ. IF IT IS NOT
25479 	 *  THEN IT IS THE END OF THE MBYTE AND WE WILL CONTINUE. IT IS NO
25480 	 *  RISK THAT A LOG RECORD WE FIND WILL NOT BE READ AT THIS TIME
25481 	 *  SINCE THE LOG RECORDS NEVER SPAN OVER A MBYTE BOUNDARY.
25482 	 * ----------------------------------------------------------------- */
25483         readExecSr(signal);
25484         logPartPtr.p->logExecState = LogPartRecord::LES_WAIT_READ_EXEC_SR;
25485         return;
25486       }//if
25487     }//if
25488     logWord = readLogword(signal);
25489     switch (logWord) {
25490 /* ========================================================================= */
25491 /* ========================================================================= */
25492     case ZPREP_OP_TYPE:
25493     {
25494       logWord = readLogword(signal);
25495       stepAhead(signal, logWord - 2);
25496       logPartPtr.p->m_redoWorkStats.m_opsPrepared++;
25497       break;
25498     }
25499 /* ========================================================================= */
25500 /* ========================================================================= */
25501     case ZINVALID_COMMIT_TYPE:
25502       jam();
25503       stepAhead(signal, ZCOMMIT_LOG_SIZE - 1);
25504       break;
25505 /* ========================================================================= */
25506 /* ========================================================================= */
25507     case ZCOMMIT_TYPE:
25508     {
25509       CommitLogRecord commitLogRecord;
25510       jam();
25511       TcConnectionrecPtr tcConnectptr;
25512       tcConnectptr.i = logPartPtr.p->logTcConrec;
25513       ndbrequire(tcConnect_pool.getValidPtr(tcConnectptr));
25514       readCommitLog(signal, &commitLogRecord, tcConnectptr);
25515       if (tcConnectptr.p->gci_hi > crestartNewestGci) {
25516         jam();
25517 /*---------------------------------------------------------------------------*/
25518 /* THIS LOG RECORD MUST BE IGNORED. IT IS PART OF A GLOBAL CHECKPOINT WHICH  */
25519 /* WILL BE INVALIDATED BY THE SYSTEM RESTART. IF NOT INVALIDATED IT MIGHT BE */
25520 /* EXECUTED IN A FUTURE SYSTEM RESTART.                                      */
25521 /*---------------------------------------------------------------------------*/
25522         tmpLogPagePtr.i = logPartPtr.p->prevLogpage;
25523         ptrCheckGuard(tmpLogPagePtr, clogPageFileSize, logPageRecord);
25524         arrGuard(logPartPtr.p->savePageIndex, ZPAGE_SIZE);
25525         tmpLogPagePtr.p->logPageWord[logPartPtr.p->savePageIndex] =
25526                                                   ZINVALID_COMMIT_TYPE;
25527         tmpLogPagePtr.p->logPageWord[ZPOS_DIRTY] = ZDIRTY;
25528       }
25529       else
25530       {
25531         jam();
25532 /*---------------------------------------------------------------------------*/
25533 /* CHECK IF I AM SUPPOSED TO EXECUTE THIS LOG RECORD. IF I AM THEN SAVE PAGE */
25534 /* INDEX IN CURRENT LOG PAGE SINCE IT WILL BE OVERWRITTEN WHEN EXECUTING THE */
25535 /* LOG RECORD.                                                               */
25536 /*---------------------------------------------------------------------------*/
25537         logPartPtr.p->execSrExecuteIndex = 0;
25538         Uint32 result = checkIfExecLog(signal, tcConnectptr);
25539         if (result == ZOK) {
25540           jam();
25541 //*---------------------------------------------------------------------------*/
25542 /* IN A NODE RESTART WE WILL NEVER END UP HERE SINCE NO FRAGMENTS HAVE BEEN  */
25543 /* DEFINED YET. THUS NO EXTRA CHECKING FOR NODE RESTART IS NECESSARY.        */
25544 /*---------------------------------------------------------------------------*/
25545           logPartPtr.p->savePageIndex =
25546              logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX];
25547           tcConnectptr.p->fragmentptr = fragptr.i;
25548           findPageRef(signal, &commitLogRecord);
25549           logPartPtr.p->execSrLogPageIndex = commitLogRecord.startPageIndex;
25550           if (logPagePtr.i != RNIL) {
25551             jam();
25552             logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = commitLogRecord.startPageIndex;
25553             logPartPtr.p->execSrLogPage = logPagePtr.i;
25554             execLogRecord(signal);
25555             return;
25556           }//if
25557           logPartPtr.p->execSrStartPageNo = commitLogRecord.startPageNo;
25558           logPartPtr.p->execSrStopPageNo = commitLogRecord.stopPageNo;
25559           findLogfile(signal, commitLogRecord.fileNo, logPartPtr, &logFilePtr);
25560           logPartPtr.p->execSrExecLogFile = logFilePtr.i;
25561           if (logFilePtr.i == logPartPtr.p->currentLogfile) {
25562             jam();
25563 #ifndef NO_REDO_PAGE_CACHE
25564             Uint32 cnt = 1 +
25565               logPartPtr.p->execSrStopPageNo - logPartPtr.p->execSrStartPageNo;
25566             evict(m_redo_page_cache, cnt);
25567 #endif
25568             readExecLog(signal);
25569             lfoPtr.p->lfoState = LogFileOperationRecord::READ_EXEC_LOG;
25570             return;
25571           } else {
25572             jam();
25573 /*---------------------------------------------------------------------------*/
25574 /* THE FILE IS CURRENTLY NOT OPEN. WE MUST OPEN IT BEFORE WE CAN READ FROM   */
25575 /* THE FILE.                                                                 */
25576 /*---------------------------------------------------------------------------*/
25577 #ifndef NO_REDO_OPEN_FILE_CACHE
25578             openFileRw_cache(signal, logFilePtr);
25579 #else
25580             logFilePtr.p->logFileStatus = LogFileRecord::OPEN_EXEC_LOG;
25581             openFileRw(signal, logFilePtr);
25582 #endif
25583             return;
25584           }//if
25585         }//if
25586         else
25587         {
25588           logPartPtr.p->m_redoWorkStats.m_opsSkipped++;
25589         }
25590       }//if
25591       break;
25592     }
25593 /* ========================================================================= */
25594 /* ========================================================================= */
25595     case ZABORT_TYPE:
25596       jam();
25597       stepAhead(signal, ZABORT_LOG_SIZE - 1);
25598       logPartPtr.p->m_redoWorkStats.m_opsSkipped++;
25599       break;
25600 /* ========================================================================= */
25601 /* ========================================================================= */
25602     case ZFD_TYPE:
25603       jam();
25604 /*---------------------------------------------------------------------------*/
25605 /* THIS IS THE FIRST ITEM WE ENCOUNTER IN A NEW FILE. AT THIS MOMENT WE SHALL*/
25606 /* SIMPLY BYPASS IT. IT HAS NO SIGNIFANCE WHEN EXECUTING THE LOG. IT HAS ITS */
25607 /* SIGNIFANCE WHEN FINDING THE START END THE END OF THE LOG.                 */
25608 /* WE HARDCODE THE PAGE INDEX SINCE THIS SHOULD NEVER BE FOUND AT ANY OTHER  */
25609 /* PLACE THAN IN THE FIRST PAGE OF A NEW FILE IN THE FIRST POSITION AFTER THE*/
25610 /* HEADER.                                                                   */
25611 /*---------------------------------------------------------------------------*/
25612       if (unlikely(logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] !=
25613 		   (ZPAGE_HEADER_SIZE + ZPOS_NO_FD)))
25614       {
25615 	line = __LINE__;
25616 	logWord = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX];
25617 	crash_msg = "ZFD_TYPE at incorrect position!";
25618 	goto crash;
25619       }
25620       {
25621         Uint32 noFdDescriptors =
25622 	  logPagePtr.p->logPageWord[ZPAGE_HEADER_SIZE + ZPOS_NO_FD];
25623           logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] =
25624 	      (ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE) +
25625 	      (noFdDescriptors * ZFD_MBYTE_SIZE * clogFileSize);
25626       }
25627       break;
25628 /* ========================================================================= */
25629 /* ========================================================================= */
25630     case ZNEXT_LOG_RECORD_TYPE:
25631       jam();
25632       stepAhead(signal, ZPAGE_SIZE - logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX]);
25633       break;
25634 /* ========================================================================= */
25635 /* ========================================================================= */
25636     case ZNEXT_MBYTE_TYPE:
25637 /*---------------------------------------------------------------------------*/
25638 /* WE WILL SKIP A PART OF THE LOG FILE. ACTUALLY THE NEXT POINTER IS TO      */
25639 /* A NEW MBYTE. THEREFORE WE WILL START UP A NEW MBYTE. THIS NEW MBYTE IS    */
25640 /* HOWEVER ONLY STARTED IF IT IS NOT AFTER THE STOP MBYTE.                   */
25641 /* IF WE HAVE REACHED THE END OF THE STOP MBYTE THEN THE EXECUTION OF THE LOG*/
25642 /* IS COMPLETED.                                                             */
25643 /*---------------------------------------------------------------------------*/
25644       if (logPartPtr.p->currentLogfile == logPartPtr.p->stopLogfile) {
25645         if (logFilePtr.p->currentMbyte == logPartPtr.p->stopMbyte) {
25646           jam();
25647 /*---------------------------------------------------------------------------*/
25648 /* THIS WAS THE LAST MBYTE TO EXECUTE IN THIS LOG PART. WE SHOULD HAVE FOUND */
25649 /* A COMPLETED GCI RECORD OF THE LAST GCI BEFORE THIS. FOR SOME REASON THIS  */
25650 /* RECORD WAS NOT AVAILABLE ON THE LOG. CRASH THE SYSTEM, A VERY SERIOUS     */
25651 /* ERROR WHICH WE MUST REALLY WORK HARD TO AVOID.                            */
25652 /*---------------------------------------------------------------------------*/
25653 /*---------------------------------------------------------------------------*/
25654 /* SEND A SIGNAL TO THE SIGNAL LOG AND THEN CRASH THE SYSTEM.                */
25655 /*---------------------------------------------------------------------------*/
25656 	  line = __LINE__;
25657 	  logWord = ZNEXT_MBYTE_TYPE;
25658 	  crash_msg = "end of log wo/ having found last GCI";
25659 	  goto crash;
25660         }//if
25661       }//if
25662 /*---------------------------------------------------------------------------*/
25663 /* START EXECUTION OF A NEW MBYTE IN THE LOG.                                */
25664 /*---------------------------------------------------------------------------*/
25665       if (logFilePtr.p->currentMbyte < (clogFileSize - 1)) {
25666         jam();
25667         logPartPtr.p->logExecState = LogPartRecord::LES_EXEC_LOG_NEW_MBYTE;
25668       } else {
25669         ndbrequire(logFilePtr.p->currentMbyte == (clogFileSize - 1));
25670         jam();
25671 /*---------------------------------------------------------------------------*/
25672 /* WE HAVE TO CHANGE FILE. CLOSE THIS ONE AND THEN OPEN THE NEXT.            */
25673 /*---------------------------------------------------------------------------*/
25674         logPartPtr.p->logExecState = LogPartRecord::LES_EXEC_LOG_NEW_FILE;
25675       }//if
25676       break;
25677 /* ========================================================================= */
25678 /* ========================================================================= */
25679     case ZCOMPLETED_GCI_TYPE:
25680       jam();
25681       logWord = readLogword(signal);
25682       if (DEBUG_REDO)
25683       {
25684         ndbout_c("found gci: %u part: %u file: %u page: %u (mb: %u)",
25685                  logWord,
25686                  logPartPtr.p->logPartNo,
25687                  logFilePtr.p->fileNo,
25688                  logFilePtr.p->currentFilepage,
25689                  logFilePtr.p->currentFilepage >> ZTWOLOG_NO_PAGES_IN_MBYTE);
25690       }
25691       logPartPtr.p->m_redoWorkStats.m_gcisExecuted++;
25692       if (logWord == logPartPtr.p->logLastGci)
25693       {
25694         jam();
25695 /*---------------------------------------------------------------------------*/
25696 /* IF IT IS THE LAST GCI TO LIVE AFTER SYSTEM RESTART THEN WE RECORD THE NEXT*/
25697 /* WORD AS THE NEW HEADER OF THE LOG FILE. OTHERWISE WE SIMPLY IGNORE THIS   */
25698 /* LOG RECORD.                                                               */
25699 /*---------------------------------------------------------------------------*/
25700         if (csrPhasesCompleted == 0) {
25701           jam();
25702 /*---------------------------------------------------------------------------*/
25703 /*WE ONLY RECORD THE HEAD OF THE LOG IN THE FIRST LOG ROUND OF LOG EXECUTION.*/
25704 /*---------------------------------------------------------------------------*/
25705           logPartPtr.p->headFileNo = logFilePtr.p->fileNo;
25706           logPartPtr.p->headPageNo = logFilePtr.p->currentFilepage;
25707           logPartPtr.p->headPageIndex =
25708                   logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX];
25709 	  logPartPtr.p->logLap = logPagePtr.p->logPageWord[ZPOS_LOG_LAP];
25710           if (DEBUG_REDO)
25711           {
25712             ndbout_c("execSr part: %u logLap: %u",
25713                      logPartPtr.p->logPartNo, logPartPtr.p->logLap);
25714           }
25715         }//if
25716 /*---------------------------------------------------------------------------*/
25717 /* THERE IS NO NEED OF EXECUTING PAST THIS LINE SINCE THERE WILL ONLY BE LOG */
25718 /* RECORDS THAT WILL BE OF NO INTEREST. THUS CLOSE THE FILE AND START THE    */
25719 /* NEXT PHASE OF THE SYSTEM RESTART.                                         */
25720 /*---------------------------------------------------------------------------*/
25721         logPartPtr.p->logExecState = LogPartRecord::LES_EXEC_LOG_COMPLETED;
25722         send_runredo_event(signal, logPartPtr.p, logPartPtr.p->logLastGci);
25723       }//if
25724       break;
25725     default:
25726       jam();
25727 /* ========================================================================= */
25728 /* ========================================================================= */
25729 /*---------------------------------------------------------------------------*/
25730 /* SEND A SIGNAL TO THE SIGNAL LOG AND THEN CRASH THE SYSTEM.                */
25731 /*---------------------------------------------------------------------------*/
25732       line = __LINE__;
25733       crash_msg = "Invalid logword";
25734       goto crash;
25735       break;
25736     }//switch
25737 /*---------------------------------------------------------------------------*/
25738 // We continue to execute log records until we find a proper one to execute or
25739 // that we reach a new page.
25740 /*---------------------------------------------------------------------------*/
25741   } while (1);
25742   return;
25743 
25744 crash:
25745   signal->theData[0] = RNIL;
25746   signal->theData[1] = logPartPtr.i;
25747   Uint32 tmp = logFilePtr.p->fileName[3];
25748   tmp = (tmp >> 8) & 0xff;// To get the Directory, DXX.
25749   signal->theData[2] = tmp;
25750   signal->theData[3] = logFilePtr.p->fileNo;
25751   signal->theData[4] = logFilePtr.p->currentMbyte;
25752   signal->theData[5] = logFilePtr.p->currentFilepage;
25753   signal->theData[6] = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX];
25754   signal->theData[7] = logWord;
25755   signal->theData[8] = line;
25756 
25757   char buf[255];
25758   BaseString::snprintf(buf, sizeof(buf),
25759 		       "Error while reading REDO log. from %d\n"
25760 		       "part: %u D=%d, F=%d Mb=%d FP=%d W1=%d W2=%d : %s gci: %u",
25761 		       signal->theData[8],
25762                        logPartPtr.p->logPartNo,
25763 		       signal->theData[2],
25764 		       signal->theData[3],
25765 		       signal->theData[4],
25766 		       signal->theData[5],
25767 		       signal->theData[6],
25768 		       signal->theData[7],
25769 		       crash_msg ? crash_msg : "",
25770 		       logPartPtr.p->logLastGci);
25771 
25772   ndbout_c("%s", buf);
25773   ndbout_c("logPartPtr.p->logExecState: %u", logPartPtr.p->logExecState);
25774   ndbout_c("crestartOldestGci: %u", crestartOldestGci);
25775   ndbout_c("crestartNewestGci: %u", crestartNewestGci);
25776   ndbout_c("csrPhasesCompleted: %u", csrPhasesCompleted);
25777   ndbout_c("logPartPtr.p->logStartGci: %u", logPartPtr.p->logStartGci);
25778   ndbout_c("logPartPtr.p->logLastGci: %u", logPartPtr.p->logLastGci);
25779 
25780   progError(__LINE__, NDBD_EXIT_SR_REDOLOG, buf);
25781 }//Dblqh::execSr()
25782 
25783 /*---------------------------------------------------------------------------*/
25784 /* THIS SIGNAL IS ONLY RECEIVED TO BE CAPTURED IN THE SIGNAL LOG. IT IS      */
25785 /* ALSO USED TO CRASH THE SYSTEM AFTER SENDING A SIGNAL TO THE LOG.          */
25786 /*---------------------------------------------------------------------------*/
execDEBUG_SIG(Signal * signal)25787 void Dblqh::execDEBUG_SIG(Signal* signal)
25788 {
25789 /*
25790 2.5 TEMPORARY VARIABLES
25791 -----------------------
25792 */
25793   jamEntry();
25794   //logPagePtr.i = signal->theData[0];
25795   //tdebug = logPagePtr.p->logPageWord[0];
25796 
25797   char buf[100];
25798   BaseString::snprintf(buf, 100,
25799 	   "Error while reading REDO log. from %d\n"
25800 	   "D=%d, F=%d Mb=%d FP=%d W1=%d W2=%d",
25801 	   signal->theData[8],
25802 	   signal->theData[2], signal->theData[3], signal->theData[4],
25803 	   signal->theData[5], signal->theData[6], signal->theData[7]);
25804 
25805   progError(__LINE__, NDBD_EXIT_SR_REDOLOG, buf);
25806 
25807   return;
25808 }//Dblqh::execDEBUG_SIG()
25809 
25810 /*---------------------------------------------------------------------------*/
25811 /*---------------------------------------------------------------------------*/
closeExecLogLab(Signal * signal)25812 void Dblqh::closeExecLogLab(Signal* signal)
25813 {
25814   logFilePtr.p->logFileStatus = LogFileRecord::CLOSED;
25815   signal->theData[0] = ZEXEC_SR;
25816   signal->theData[1] = logFilePtr.p->logPartRec;
25817   sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
25818   return;
25819 }//Dblqh::closeExecLogLab()
25820 
openExecLogLab(Signal * signal)25821 void Dblqh::openExecLogLab(Signal* signal)
25822 {
25823 #ifndef NO_REDO_PAGE_CACHE
25824   Uint32 cnt = 1 +
25825     logPartPtr.p->execSrStopPageNo - logPartPtr.p->execSrStartPageNo;
25826 
25827 #if 0
25828   Uint32 MAX_EXTRA_READ = 9; // can be max 9 due to FSREADREQ formatting
25829   while (cnt < maxextraread && (logPartPtr.p->execSrStopPageNo % 32) != 31)
25830   {
25831     jam();
25832     cnt++;
25833     logPartPtr.p->execSrStopPageNo++;
25834   }
25835 #endif
25836 
25837   evict(m_redo_page_cache, cnt);
25838 #endif
25839 
25840   readExecLog(signal);
25841   lfoPtr.p->lfoState = LogFileOperationRecord::READ_EXEC_LOG;
25842   return;
25843 }//Dblqh::openExecLogLab()
25844 
readExecLogLab(Signal * signal)25845 void Dblqh::readExecLogLab(Signal* signal)
25846 {
25847   buildLinkedLogPageList(signal);
25848 #ifndef NO_REDO_PAGE_CACHE
25849   addCachePages(m_redo_page_cache,
25850                 logPartPtr.p->logPartNo,
25851                 logPartPtr.p->execSrStartPageNo,
25852                 lfoPtr.p);
25853 #endif
25854   logPartPtr.p->logExecState = LogPartRecord::LES_EXEC_LOGREC_FROM_FILE;
25855   logPartPtr.p->execSrLfoRec = lfoPtr.i;
25856   logPartPtr.p->execSrLogPage = logPagePtr.i;
25857   logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] =
25858     logPartPtr.p->execSrLogPageIndex;
25859   execLogRecord(signal);
25860   return;
25861 }//Dblqh::readExecLogLab()
25862 
25863 /*---------------------------------------------------------------------------*/
25864 /* THIS CODE IS USED TO EXECUTE A LOG RECORD WHEN IT'S DATA HAVE BEEN LOCATED*/
25865 /* AND TRANSFERRED INTO MEMORY.                                              */
25866 /*---------------------------------------------------------------------------*/
execLogRecord(Signal * signal)25867 void Dblqh::execLogRecord(Signal* signal)
25868 {
25869   jamEntry();
25870 
25871   TcConnectionrecPtr tcConnectptr;
25872   tcConnectptr.i = logPartPtr.p->logTcConrec;
25873   ndbrequire(tcConnect_pool.getUncheckedPtrRW(tcConnectptr));
25874   fragptr.i = tcConnectptr.p->fragmentptr;
25875   ndbrequire(Magic::check_ptr(tcConnectptr.p));
25876   c_fragment_pool.getPtr(fragptr);
25877   tcConnectptr.p->m_log_part_ptr_i = fragptr.p->m_log_part_ptr_i;
25878 
25879   // Read a log record and prepare it for execution
25880   readLogHeader(signal, tcConnectptr);
25881   readKey(signal, tcConnectptr);
25882   readAttrinfo(signal, tcConnectptr);
25883   initReqinfoExecSr(signal, tcConnectptr);
25884   arrGuard(logPartPtr.p->execSrExecuteIndex, MAX_REPLICAS);
25885   BlockReference ref = fragptr.p->execSrBlockref[logPartPtr.p->execSrExecuteIndex];
25886   tcConnectptr.p->nextReplica = refToNode(ref);
25887   tcConnectptr.p->connectState = TcConnectionrec::LOG_CONNECTED;
25888   tcConnectptr.p->tcOprec = tcConnectptr.i;
25889   tcConnectptr.p->tcHashKeyHi = 0;
25890   DEB_REDO(("Execute REDO log on tab(%u,%u)",
25891            fragptr.p->tabRef, fragptr.p->fragId));
25892   packLqhkeyreqLab(signal, tcConnectptr);
25893 
25894   logPartPtr.p->m_redoWorkStats.m_opsExecuted++;
25895   logPartPtr.p->m_redoWorkStats.m_bytesExecuted+=
25896     (tcConnectptr.p->primKeyLen + tcConnectptr.p->totSendlenAi) << 2;
25897   return;
25898 }//Dblqh::execLogRecord()
25899 
25900 //----------------------------------------------------------------------------
25901 // This function invalidates log pages after the last GCI record in a
25902 // system/node restart. This is to ensure that the end of the log is
25903 // consistent. This function is executed last in start phase 3.
25904 // RT 450. EDTJAMO.
25905 //----------------------------------------------------------------------------
25906 Uint32
nextLogFilePtr(Uint32 logFilePtrI)25907 Dblqh::nextLogFilePtr(Uint32 logFilePtrI)
25908 {
25909   LogFileRecordPtr tmp;
25910   tmp.i = logFilePtrI;
25911   ptrCheckGuard(tmp, clogFileFileSize, logFileRecord);
25912   return tmp.p->nextLogFile;
25913 }
25914 
25915 void
invalidateLogAfterLastGCI(Signal * signal)25916 Dblqh::invalidateLogAfterLastGCI(Signal* signal)
25917 {
25918   jam();
25919   if (logPartPtr.p->logExecState != LogPartRecord::LES_EXEC_LOG_INVALIDATE) {
25920     jam();
25921     systemError(signal, __LINE__);
25922   }
25923 
25924   if (logFilePtr.p->fileNo != logPartPtr.p->invalidateFileNo) {
25925     jam();
25926     systemError(signal, __LINE__);
25927   }
25928 
25929   switch (lfoPtr.p->lfoState) {
25930   case LogFileOperationRecord::READ_SR_INVALIDATE_SEARCH_FILES:
25931   {
25932     jam();
25933     // Check if this file contains pages needing to be invalidated
25934     ndbrequire(logPartPtr.p->invalidatePageNo == 1);
25935     bool ok = logPagePtr.p->logPageWord[ZPOS_LOG_LAP] == logPartPtr.p->logLap;
25936     releaseLfo(signal);
25937     releaseLogpage(signal);
25938     if (ok)
25939     {
25940       jam();
25941       // This page must be invalidated.
25942       // We search next file
25943       readFileInInvalidate(signal, 3);
25944       return;
25945     }
25946     else
25947     {
25948       jam();
25949       /**
25950        * This file doest not need to be invalidated...move to previous
25951        *   file and search forward linear
25952        */
25953       readFileInInvalidate(signal, 6);
25954       return;
25955     }
25956     break;
25957   }
25958   case LogFileOperationRecord::READ_SR_INVALIDATE_PAGES:
25959     jam();
25960     /**
25961      * Check if this page must be invalidated.
25962      *
25963      * If the log lap number on a page after the head of the log is the same
25964      * as the actual log lap number we must invalidate this page. Otherwise it
25965      * could be impossible to find the end of the log in a later system/node
25966      * restart.
25967      *
25968      * After a restart, the log lap is used to find the old head (last written
25969      * part) of the redo log. In some cases recovery may complete by applying
25970      * up to some point long before the old head of the redo log, creating a
25971      * new head at an earlier position.  In this case, it is important to
25972      * invalidate the 'trimmed' part up to the old head, so that a future
25973      * recovery does not accidentally include it based on the log lap.  This
25974      * invalidation must itself be repeatable in case it fails part-way through.
25975      *
25976      * In addition we have the following conditions that give us an end to the
25977      * search. If it were not for these conditions when we find the first log
25978      * page with the wrong log lap.
25979      *
25980      * 1) Redo log writing can skip over pages to the next MByte start, this
25981      * means that pages can contain an old log lap although they are in the
25982      * part of REDO log which should be invalidated unless we search forward
25983      * at least until the next MByte start. We can have at most 1 MByte minus
25984      * one page of such pages with old log lap. When we skip major chunks like
25985      * this we set the synch flag to ensure that we can have an upper bound of
25986      * how far ahead in the REDO log we need to search until we have found the
25987      * end of it.
25988      *
25989      * 2) We can at most have 1 MByte of log writes outstanding.
25990      *    This is ensured by NDBFS through the use of the auto_sync_size
25991      *    parameter when calling NDBFS to open the file.
25992      *
25993      * 3) In worst case scenarios we get the last of those 1 MByte of pages
25994      *    written by NDBFS ending up on disk, but no other page.
25995      *
25996      * Given these facts we need to search onwards for a page with a current
25997      * log lap for at least 1 MByte plus the maximum skip size at MByte change
25998      * until we stop the search and decide we actually found the last page with
25999      * the current log lap number. The maximum skip size at normal MByte change
26000      * is equal to the largest size of a REDO log record. At the moment this
26001      * should never be bigger than one REDO log page at the moment. But this
26002      * can and will change in the future most likely.
26003      *
26004      * As an additional safety measure and to align the algorithm searching
26005      * for the first page to start invalidate from (invalidation happens
26006      * in a backward fashion) and the algorithm searching for the log end
26007      * at restart, we will not stop the search until we have found an
26008      * unwritten page 0 of a MByte (or page 1 for the first MByte in a file).
26009      *
26010      * This means that if we check one entire MByte from the first unwritten
26011      * page we find then we are always safe that we have found the end of the
26012      * REDO log.
26013      *
26014      * If we don't invalidate all REDO log pages that are invalid we can
26015      * easily run into problems in later restarts by connecting new log
26016      * pages to old invalid log pages which makes the finding of the start
26017      * and end of log impossible.
26018      */
26019 
26020     do
26021     {
26022       if (logPagePtr.p->logPageWord[ZPOS_LOG_LAP] < logPartPtr.p->logLap)
26023       {
26024         /**
26025          * We have found an old page which haven't been written in this log
26026          * lap. We need however to continue searching for pages to invalidate
26027          * a bit further. We need to actually find a page 0 within a
26028          * MByte that is old before we can quit the search and for the first
26029          * MByte this is actually page 1 since we never invalidate page 0 of
26030          * a log file.
26031          *
26032          * We will still track the old pages to provide some printouts of
26033          * that this happened so that we can gain better understanding of
26034          * how the REDO log writing actually works.
26035          */
26036         if (!logPartPtr.p->firstInvalidatePageFound)
26037         {
26038           Uint32 firstInvalidMByte = logPartPtr.p->invalidatePageNo /
26039                                      ZPAGES_IN_MBYTE;
26040           if ((firstInvalidMByte + 2) >= clogFileSize)
26041           {
26042             jam();
26043             logPartPtr.p->endInvalidMByteSearch =
26044               (firstInvalidMByte + 2) - clogFileSize;
26045           }
26046           else
26047           {
26048             jam();
26049             logPartPtr.p->endInvalidMByteSearch = firstInvalidMByte + 2;
26050           }
26051           logPartPtr.p->firstInvalidatePageFound = true;
26052           logPartPtr.p->firstInvalidatePageNo = logPartPtr.p->invalidatePageNo;
26053           logPartPtr.p->firstInvalidateFileNo = logPartPtr.p->invalidateFileNo;
26054         }
26055         else
26056         {
26057           jam();
26058           if (((logPartPtr.p->invalidatePageNo % ZPAGES_IN_MBYTE) == 0) ||
26059                (logPartPtr.p->invalidatePageNo == 1))
26060           {
26061             jam();
26062             Uint32 currentMByte = logPartPtr.p->invalidatePageNo /
26063                                   ZPAGES_IN_MBYTE;
26064             if (currentMByte == logPartPtr.p->endInvalidMByteSearch)
26065             {
26066               jam();
26067               /* No need to search any longer */
26068               break;
26069             }
26070           }
26071         }
26072       }
26073       else
26074       {
26075         ndbrequire(logPagePtr.p->logPageWord[ZPOS_LOG_LAP] ==
26076                    logPartPtr.p->logLap);
26077         if (logPartPtr.p->firstInvalidatePageFound)
26078         {
26079           jam();
26080           logPartPtr.p->firstInvalidatePageFound = false;
26081           g_eventLogger->info("Found a block of unwritten log pages in part %u"
26082                               ", followed by a written page, First unwritten:"
26083                               " file: %u, page: %u, Written: file: %u,"
26084                               " page: %u",
26085                               logPartPtr.p->logPartNo,
26086                               logPartPtr.p->firstInvalidateFileNo,
26087                               logPartPtr.p->firstInvalidatePageNo,
26088                               logPartPtr.p->invalidateFileNo,
26089                               logPartPtr.p->invalidatePageNo);
26090         }
26091       }
26092       jam();
26093       // This page must be invalidated.
26094       // We search for end
26095       // read next
26096       releaseLfo(signal);
26097       releaseLogpage(signal);
26098       readFileInInvalidate(signal, 1);
26099       return;
26100     } while (0);
26101 
26102     /**
26103      * We found the "last" page to invalidate...
26104      *
26105      * We now need to start the invalidation from firstInvalidatePageNo - 1.
26106      * We invalidate backwards to ensure that we make progress even in the
26107      * presence of multiple restarts.
26108      *
26109      * We could however have already stepped into a new file, in this case
26110      * we will start the invalidation writes from page 1 to ensure that
26111      * integrate this stepping ahead nicely with the file change code.
26112      * This is a harmless side effect since invalidating an invalid page
26113      * is not a problem.
26114      */
26115 
26116     if (logPartPtr.p->invalidateFileNo != logPartPtr.p->firstInvalidateFileNo)
26117     {
26118       jam();
26119       logPartPtr.p->invalidatePageNo = 1;
26120     }
26121     else
26122     {
26123       jam();
26124       logPartPtr.p->invalidatePageNo = logPartPtr.p->firstInvalidatePageNo;
26125     }
26126     g_eventLogger->info("Start invalidating: Part %u, Head: file: %u,"
26127                         " page: %u, Invalidation start: file: %u,"
26128                         " page: %u, actual start invalidate: file: %u"
26129                         " page: %u",
26130                         logPartPtr.p->logPartNo,
26131                         logPartPtr.p->headFileNo,
26132                         logPartPtr.p->headPageNo,
26133                         logPartPtr.p->firstInvalidateFileNo,
26134                         logPartPtr.p->firstInvalidatePageNo - 1,
26135                         logPartPtr.p->invalidateFileNo,
26136                         logPartPtr.p->invalidatePageNo - 1);
26137 
26138     // Fall through...
26139   case LogFileOperationRecord::WRITE_SR_INVALIDATE_PAGES:
26140     jam();
26141 
26142     releaseLfo(signal);
26143     releaseLogpage(signal);
26144 
26145     // Step backwards...
26146     logPartPtr.p->invalidatePageNo--;
26147 
26148     if (logPartPtr.p->invalidatePageNo == 0)
26149     {
26150       jam();
26151 
26152       if (logFilePtr.p->fileNo == 0)
26153       {
26154         jam();
26155         /**
26156          * We're wrapping in the log...
26157          *   update logLap
26158          */
26159         logPartPtr.p->logLap--;
26160 	ndbrequire(logPartPtr.p->logLap); // Should always be > 0
26161         if (DEBUG_REDO)
26162         {
26163           ndbout_c("invalidateLogAfterLastGCI part: %u wrap from file 0 -> logLap: %u",
26164                    logPartPtr.p->logPartNo, logPartPtr.p->logLap);
26165         }
26166       }
26167 
26168       if (invalidateCloseFile(signal, logPartPtr, logFilePtr,
26169                               LogFileRecord::CLOSE_SR_WRITE_INVALIDATE_PAGES))
26170       {
26171         jam();
26172         return;
26173       }
26174       writeFileInInvalidate(signal, 1); // step prev
26175       return;
26176     }
26177     writeFileInInvalidate(signal, 0);
26178     return;
26179   default:
26180     jamLine(lfoPtr.p->lfoState);
26181     ndbabort();
26182   }
26183 }
26184 
26185 void
writeFileInInvalidate(Signal * signal,int stepPrev)26186 Dblqh::writeFileInInvalidate(Signal* signal, int stepPrev)
26187 {
26188   /**
26189    * Move to prev file
26190    */
26191   if (stepPrev == 1)
26192   {
26193     jam();
26194     logFilePtr.i = logFilePtr.p->prevLogFile;
26195     ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
26196     logPartPtr.p->invalidateFileNo = logFilePtr.p->fileNo;
26197     logPartPtr.p->invalidatePageNo = clogFileSize * ZPAGES_IN_MBYTE - 1;
26198   }
26199 
26200   if (logPartPtr.p->invalidateFileNo == logPartPtr.p->headFileNo &&
26201       logPartPtr.p->invalidatePageNo == logPartPtr.p->headPageNo)
26202   {
26203     jam();
26204     /**
26205      * Done...
26206      */
26207     logFilePtr.i = logPartPtr.p->currentLogfile;
26208     ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
26209 
26210     logFilePtr.i = logFilePtr.p->nextLogFile;
26211     ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
26212 
26213     exitFromInvalidate(signal);
26214     return;
26215   }
26216 
26217   if (stepPrev == 1 && logFilePtr.p->logFileStatus != LogFileRecord::OPEN)
26218   {
26219     jam();
26220     if (DEBUG_REDO)
26221     {
26222       ndbout_c("invalidate part: %u open for write %u",
26223                logPartPtr.p->logPartNo, logFilePtr.p->fileNo);
26224     }
26225     logFilePtr.p->logFileStatus =LogFileRecord::OPEN_SR_WRITE_INVALIDATE_PAGES;
26226     openFileRw(signal, logFilePtr);
26227     return;
26228   }
26229 
26230   seizeLogpage(signal);
26231 
26232   bool sync = false;
26233   const bool isLastPageToInvalidateInPart =
26234     ((logPartPtr.p->invalidatePageNo - 1) == logPartPtr.p->headPageNo) &&
26235      (logPartPtr.p->invalidateFileNo == logPartPtr.p->headFileNo);
26236   const bool isLastPageToInvalidateInFile =
26237     logPartPtr.p->invalidatePageNo == 1;
26238   const bool isLastPageToInvalidateInMByte =
26239     (logPartPtr.p->invalidatePageNo % ZPAGES_IN_MBYTE) == 0;
26240   if (isLastPageToInvalidateInPart ||
26241       isLastPageToInvalidateInFile ||
26242       isLastPageToInvalidateInMByte)
26243   {
26244     /**
26245      * In some cases we could end up with thousands of log pages to
26246      * to invalidate, to speed up this processing, only sync at file
26247      * switch, at the last page to write in a MByte. This will decrease
26248      * invalidation times in those cases from minutes to seconds.
26249      *
26250      * We keep the maximum size of invalidation writes to 1MByte and at
26251      * boundaries of MBytes to ensure that we don't allow for ways to
26252      * increase the set of unwritten pages in multiple restarts.
26253      */
26254     jam();
26255     sync = true;
26256   }
26257   /**
26258    * Make page really empty
26259    */
26260   bzero(logPagePtr.p, sizeof(LogPageRecord));
26261   writeSinglePage(signal,
26262                   logPartPtr.p->invalidatePageNo,
26263                   ZPAGE_SIZE - 1,
26264                   __LINE__,
26265                   sync);
26266 
26267   lfoPtr.p->lfoState = LogFileOperationRecord::WRITE_SR_INVALIDATE_PAGES;
26268   return;
26269 }//Dblqh::invalidateLogAfterLastGCI
26270 
26271 bool
invalidateCloseFile(Signal * signal,Ptr<LogPartRecord> partPtr,Ptr<LogFileRecord> filePtr,LogFileRecord::LogFileStatus status)26272 Dblqh::invalidateCloseFile(Signal* signal,
26273                            Ptr<LogPartRecord> partPtr,
26274                            Ptr<LogFileRecord> filePtr,
26275                            LogFileRecord::LogFileStatus status)
26276 {
26277   jam();
26278   if (filePtr.p->fileNo != 0 &&
26279       filePtr.i != partPtr.p->currentLogfile &&
26280       filePtr.i != nextLogFilePtr(logPartPtr.p->currentLogfile))
26281   {
26282     jam();
26283     if (DEBUG_REDO)
26284     {
26285       ndbout_c("invalidate part: %u close %u(%u) state: %u (%u)",
26286                logPartPtr.p->logPartNo,
26287                logFilePtr.p->fileNo,
26288                logFilePtr.i,
26289                (Uint32)status,
26290                logPartPtr.p->currentLogfile);
26291     }
26292     filePtr.p->logFileStatus = status;
26293     closeFile(signal, filePtr, __LINE__);
26294     return true;
26295   }
26296   return false;
26297 }
26298 
readFileInInvalidate(Signal * signal,int stepNext)26299 void Dblqh::readFileInInvalidate(Signal* signal, int stepNext)
26300 {
26301   jam();
26302 
26303   if (DEBUG_REDO)
26304   {
26305     ndbout_c("readFileInInvalidate part: %u file: %u stepNext: %u",
26306              logPartPtr.p->logPartNo, logFilePtr.p->fileNo, stepNext);
26307   }
26308 
26309   if (stepNext == 0)
26310   {
26311     jam();
26312     // Contact NDBFS. Real time break.
26313     readSinglePage(signal, logPartPtr.p->invalidatePageNo);
26314     lfoPtr.p->lfoState = LogFileOperationRecord::READ_SR_INVALIDATE_PAGES;
26315     return;
26316   }
26317 
26318   if (stepNext == 1)
26319   {
26320     jam();
26321     logPartPtr.p->invalidatePageNo++;
26322     if (logPartPtr.p->invalidatePageNo == (clogFileSize * ZPAGES_IN_MBYTE))
26323     {
26324       if (invalidateCloseFile(signal, logPartPtr, logFilePtr,
26325                               LogFileRecord::CLOSE_SR_READ_INVALIDATE_PAGES))
26326       {
26327         jam();
26328         return;
26329       }
26330       else
26331       {
26332         jam();
26333         stepNext = 2; // After close
26334       }
26335     }
26336     else
26337     {
26338       jam();
26339       // Contact NDBFS. Real time break.
26340       readSinglePage(signal, logPartPtr.p->invalidatePageNo);
26341       lfoPtr.p->lfoState = LogFileOperationRecord::READ_SR_INVALIDATE_PAGES;
26342       return;
26343     }
26344   }
26345 
26346   if (stepNext == 2)
26347   {
26348     jam();
26349     // We continue in the next file.
26350     logFilePtr.i = logFilePtr.p->nextLogFile;
26351     ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
26352     logPartPtr.p->invalidateFileNo = logFilePtr.p->fileNo;
26353     // Page 0 is used for file descriptors.
26354     logPartPtr.p->invalidatePageNo = 1;
26355 
26356     if (logFilePtr.p->fileNo == 0)
26357     {
26358       /**
26359        * We're wrapping in the log...
26360        *   update logLap
26361        */
26362       logPartPtr.p->logLap++;
26363       if (DEBUG_REDO)
26364       {
26365         ndbout_c("readFileInInvalidate part: %u step: %u wrap to file 0 -> logLap: %u",
26366                  logPartPtr.p->logPartNo, stepNext, logPartPtr.p->logLap);
26367       }
26368     }
26369 
26370 stepNext_2:
26371     if (logFilePtr.p->logFileStatus != LogFileRecord::OPEN)
26372     {
26373       jam();
26374       if (DEBUG_REDO)
26375       {
26376         ndbout_c("invalidate part: %u step: %u open for read %u",
26377                  logPartPtr.p->logPartNo, stepNext, logFilePtr.p->fileNo);
26378       }
26379       logFilePtr.p->logFileStatus =LogFileRecord::OPEN_SR_READ_INVALIDATE_PAGES;
26380       openFileRw(signal, logFilePtr);
26381       return;
26382     }
26383 
26384     // Contact NDBFS. Real time break.
26385     readSinglePage(signal, logPartPtr.p->invalidatePageNo);
26386     lfoPtr.p->lfoState = LogFileOperationRecord::READ_SR_INVALIDATE_PAGES;
26387     return;
26388   }
26389 
26390   if (stepNext == 3)
26391   {
26392     jam();
26393     if (invalidateCloseFile
26394         (signal, logPartPtr, logFilePtr,
26395          LogFileRecord::CLOSE_SR_READ_INVALIDATE_SEARCH_FILES))
26396     {
26397       jam();
26398       return;
26399     }
26400     stepNext = 4;
26401   }
26402 
26403   if (stepNext == 4)
26404   {
26405     jam();
26406     logFilePtr.i = logFilePtr.p->nextLogFile;
26407     ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
26408     logPartPtr.p->invalidateFileNo = logFilePtr.p->fileNo;
26409     // Page 0 is used for file descriptors.
26410     logPartPtr.p->invalidatePageNo = 1;
26411 
26412     if (logFilePtr.p->fileNo == 0)
26413     {
26414       /**
26415        * We're wrapping in the log...
26416        *   update logLap
26417        */
26418       logPartPtr.p->logLap++;
26419       if (DEBUG_REDO)
26420       {
26421         ndbout_c("readFileInInvalidate part: %u step: %u wrap to file 0 -> logLap: %u",
26422                  logPartPtr.p->logPartNo, stepNext, logPartPtr.p->logLap);
26423       }
26424     }
26425 
26426     if (logFilePtr.p->logFileStatus != LogFileRecord::OPEN)
26427     {
26428       jam();
26429       if (DEBUG_REDO)
26430       {
26431         ndbout_c("invalidate part: %u step: %u open for read %u",
26432                  logPartPtr.p->logPartNo, stepNext, logFilePtr.p->fileNo);
26433       }
26434       logFilePtr.p->logFileStatus =
26435         LogFileRecord::OPEN_SR_READ_INVALIDATE_SEARCH_FILES;
26436       openFileRw(signal, logFilePtr);
26437       return;
26438     }
26439     stepNext = 5;
26440   }
26441 
26442   if (stepNext == 5)
26443   {
26444     jam();
26445     // Contact NDBFS. Real time break.
26446     readSinglePage(signal, logPartPtr.p->invalidatePageNo);
26447     lfoPtr.p->lfoState =
26448       LogFileOperationRecord::READ_SR_INVALIDATE_SEARCH_FILES;
26449     return;
26450   }
26451 
26452   if (stepNext == 6)
26453   {
26454     jam();
26455     if (invalidateCloseFile
26456         (signal, logPartPtr, logFilePtr,
26457          LogFileRecord::CLOSE_SR_READ_INVALIDATE_SEARCH_LAST_FILE))
26458     {
26459       jam();
26460       return;
26461     }
26462     stepNext = 7;
26463   }
26464 
26465   if (stepNext == 7)
26466   {
26467     jam();
26468 
26469     if (logFilePtr.p->fileNo == 0)
26470     {
26471       jam();
26472       /**
26473        * We're wrapping in the log...
26474        *   update logLap
26475        */
26476       logPartPtr.p->logLap--;
26477       ndbrequire(logPartPtr.p->logLap); // Should always be > 0
26478       if (DEBUG_REDO)
26479       {
26480         ndbout_c("invalidateLogAfterLastGCI part: %u step: %u wrap from file 0 -> logLap: %u",
26481                  logPartPtr.p->logPartNo, stepNext, logPartPtr.p->logLap);
26482       }
26483     }
26484 
26485     logFilePtr.i = logFilePtr.p->prevLogFile;
26486     ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
26487 
26488     logPartPtr.p->invalidateFileNo = logFilePtr.p->fileNo;
26489     // Page 0 is used for file descriptors.
26490     logPartPtr.p->invalidatePageNo = 1;
26491 
26492     if (logPartPtr.p->invalidateFileNo == logPartPtr.p->headFileNo)
26493     {
26494       jam();
26495       logPartPtr.p->invalidatePageNo = logPartPtr.p->headPageNo;
26496 
26497       if (! ((cstartType == NodeState::ST_INITIAL_START) ||
26498              (cstartType == NodeState::ST_INITIAL_NODE_RESTART)))
26499       {
26500         jam();
26501         if (logFilePtr.i == logPartPtr.p->lastLogfile)
26502         {
26503           jam();
26504           Uint32 lastMbytePageNo =
26505             logPartPtr.p->lastMbyte << ZTWOLOG_NO_PAGES_IN_MBYTE;
26506           if (logPartPtr.p->invalidatePageNo < lastMbytePageNo)
26507           {
26508             jam();
26509             if (DEBUG_REDO)
26510             {
26511               ndbout_c("readFileInInvalidate part: %u step: %u moving invalidatePageNo from %u to %u (lastMbyte)",
26512                        logPartPtr.p->logPartNo, stepNext,
26513                        logPartPtr.p->invalidatePageNo,
26514                        lastMbytePageNo);
26515             }
26516             logPartPtr.p->invalidatePageNo = lastMbytePageNo;
26517           }
26518         }
26519       }
26520       readFileInInvalidate(signal, 1);
26521       return;
26522     }
26523 
26524     goto stepNext_2;
26525   }
26526   ndbabort();
26527 }
26528 
exitFromInvalidate(Signal * signal)26529 void Dblqh::exitFromInvalidate(Signal* signal)
26530 {
26531   jam();
26532 
26533   if (DEBUG_REDO)
26534   {
26535     jam();
26536     printf("exitFromInvalidate part: %u head file: %u page: %u open: ",
26537            logPartPtr.p->logPartNo,
26538            logPartPtr.p->headFileNo,
26539            logPartPtr.p->headPageNo);
26540 
26541     LogFileRecordPtr tmp;
26542     tmp.i = logPartPtr.p->currentLogfile;
26543     do
26544     {
26545       jam();
26546       ptrCheckGuard(tmp, clogFileFileSize, logFileRecord);
26547       if (tmp.p->logFileStatus != LogFileRecord::LFS_IDLE &&
26548           tmp.p->logFileStatus != LogFileRecord::CLOSED)
26549       {
26550         jam();
26551         printf("%u ", tmp.p->fileNo);
26552       }
26553       tmp.i = tmp.p->nextLogFile;
26554     } while (tmp.i != logPartPtr.p->currentLogfile && tmp.i != RNIL);
26555     printf("\n");
26556 
26557     tmp.i = logPartPtr.p->currentLogfile;
26558     ptrCheckGuard(tmp, clogFileFileSize, logFileRecord);
26559 
26560     LogPosition head = { tmp.p->fileNo, tmp.p->currentMbyte };
26561     LogPosition tail = { logPartPtr.p->logTailFileNo,
26562                          logPartPtr.p->logTailMbyte};
26563     Uint64 mb = free_log(head, tail, logPartPtr.p->noLogFiles, clogFileSize);
26564     Uint64 total = logPartPtr.p->noLogFiles * Uint64(clogFileSize);
26565     ndbout_c("head: [ %u %u ] tail: [ %u %u ] free: %llu total: %llu",
26566              head.m_file_no, head.m_mbyte,
26567              tail.m_file_no, tail.m_mbyte,
26568              mb, total);
26569   }
26570 
26571   logFilePtr.i = logPartPtr.p->firstLogfile;
26572   ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
26573   logPagePtr.i = logFilePtr.p->logPageZero;
26574   ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
26575   logPagePtr.p->logPageWord[ZPAGE_HEADER_SIZE + ZPOS_FILE_NO] =
26576     logPartPtr.p->headFileNo;
26577   writeSinglePage(signal, 0, ZPAGE_SIZE - 1, __LINE__);
26578 
26579   lfoPtr.p->logFileRec = logFilePtr.i;
26580   lfoPtr.p->lfoState = LogFileOperationRecord::WRITE_SR_INVALIDATE_PAGES_UPDATE_PAGE0;
26581   return;
26582 }
26583 
26584 /*---------------------------------------------------------------------------*/
26585 /* THE EXECUTION OF A LOG RECORD IS COMPLETED. RELEASE PAGES IF THEY WERE    */
26586 /* READ FROM DISK FOR THIS PARTICULAR OPERATION.                             */
26587 /*---------------------------------------------------------------------------*/
completedLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)26588 void Dblqh::completedLab(Signal* signal, const TcConnectionrecPtr tcConnectptr)
26589 {
26590   Uint32 result = returnExecLog(signal, tcConnectptr);
26591 /*---------------------------------------------------------------------------*/
26592 /*       ENTER COMPLETED WITH                                                */
26593 /*         LQH_CONNECTPTR                                                    */
26594 /*---------------------------------------------------------------------------*/
26595   if (result == ZOK) {
26596     jam();
26597     execLogRecord(signal);
26598     return;
26599   } else if (result == ZNOT_OK) {
26600     jam();
26601     signal->theData[0] = ZEXEC_SR;
26602     signal->theData[1] = logPartPtr.i;
26603     sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
26604   } else {
26605     jam();
26606     /*empty*/;
26607   }//if
26608 /*---------------------------------------------------------------------------*/
26609 /* WE HAVE TO WAIT FOR CLOSING OF THE EXECUTED LOG FILE BEFORE PROCEEDING IN */
26610 /* RARE CASES.                                                               */
26611 /*---------------------------------------------------------------------------*/
26612   return;
26613 }//Dblqh::completedLab()
26614 
26615 /*---------------------------------------------------------------------------*/
26616 /* EXECUTION OF LOG RECORD WAS NOT SUCCESSFUL. CHECK IF IT IS OK ANYWAY,     */
26617 /* THEN EXECUTE THE NEXT LOG RECORD.                                         */
26618 /*---------------------------------------------------------------------------*/
logLqhkeyrefLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)26619 void Dblqh::logLqhkeyrefLab(Signal* signal,
26620                             const TcConnectionrecPtr tcConnectptr)
26621 {
26622   Uint32 result = returnExecLog(signal, tcConnectptr);
26623   switch (tcConnectptr.p->operation) {
26624   case ZUPDATE:
26625   case ZDELETE:
26626     jam();
26627     if (unlikely(terrorCode != ZNO_TUPLE_FOUND))
26628       goto error;
26629     break;
26630   case ZINSERT:
26631     jam();
26632     if (unlikely(terrorCode != ZTUPLE_ALREADY_EXIST && terrorCode != 899))
26633       goto error;
26634 
26635     break;
26636   default:
26637     goto error;
26638   }
26639 
26640   if (result == ZOK) {
26641     jam();
26642     execLogRecord(signal);
26643     return;
26644   } else if (result == ZNOT_OK) {
26645     jam();
26646     signal->theData[0] = ZEXEC_SR;
26647     signal->theData[1] = logPartPtr.i;
26648     sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
26649   } else {
26650     jam();
26651     /*empty*/;
26652   }//if
26653   /* ------------------------------------------------------------------------
26654    *  WE HAVE TO WAIT FOR CLOSING OF THE EXECUTED LOG FILE BEFORE
26655    *  PROCEEDING IN RARE CASES.
26656    * ----------------------------------------------------------------------- */
26657   return;
26658 error:
26659   BaseString tmp;
26660   tmp.appfmt("You have found a bug!"
26661 	     " Failed op (%s) during REDO table: %d fragment: %d err: %d",
26662 	     tcConnectptr.p->operation == ZINSERT ? "INSERT" :
26663 	     tcConnectptr.p->operation == ZUPDATE ? "UPDATE" :
26664 	     tcConnectptr.p->operation == ZDELETE ? "DELETE" :
26665 	     tcConnectptr.p->operation == ZWRITE ? "WRITE" : "<unknown>",
26666 	     tcConnectptr.p->tableref,
26667 	     tcConnectptr.p->fragmentid,
26668 	     terrorCode);
26669   progError(__LINE__, NDBD_EXIT_SYSTEM_ERROR,
26670 	    tmp.c_str());
26671 }//Dblqh::logLqhkeyrefLab()
26672 
closeExecSrCompletedLab(Signal * signal)26673 void Dblqh::closeExecSrCompletedLab(Signal* signal)
26674 {
26675   logFilePtr.p->logFileStatus = LogFileRecord::CLOSED;
26676   signal->theData[0] = logFilePtr.p->logPartRec;
26677   execLogComp(signal);
26678   return;
26679 }//Dblqh::closeExecSrCompletedLab()
26680 
26681 /* --------------------------------------------------------------------------
26682  *  ONE OF THE LOG PARTS HAVE COMPLETED EXECUTING THE LOG. CHECK IF ALL LOG
26683  *  PARTS ARE COMPLETED. IF SO START SENDING EXEC_FRAGCONF AND EXEC_SRCONF.
26684  * ------------------------------------------------------------------------- */
execLogComp(Signal * signal)26685 void Dblqh::execLogComp(Signal* signal)
26686 {
26687   logPartPtr.i = signal->theData[0];
26688   ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
26689   logPartPtr.p->logPartState = LogPartRecord::SR_THIRD_PHASE_COMPLETED;
26690   /* ------------------------------------------------------------------------
26691    *  WE MUST RELEASE THE TC CONNECT RECORD HERE SO THAT IT CAN BE REUSED.
26692    * ----------------------------------------------------------------------- */
26693   TcConnectionrecPtr tcConnectptr;
26694   tcConnectptr.i = logPartPtr.p->logTcConrec;
26695   ndbrequire(tcConnect_pool.getValidPtr(tcConnectptr));
26696   logPartPtr.p->logTcConrec = RNIL;
26697   releaseTcrecLog(signal, tcConnectptr);
26698   g_eventLogger->info("LDM(%u): Completed REDO log execution on"
26699                       " part %u, ops executed = %llu, bytes executed = %llu,",
26700                       instance(),
26701                       logPartPtr.p->logPartNo,
26702                       logPartPtr.p->m_redoWorkStats.m_opsExecuted,
26703                       logPartPtr.p->m_redoWorkStats.m_bytesExecuted);
26704   g_eventLogger->info("LDM(%u): Log part %u stats:"
26705                       " ops skipped = %llu, ops prepared %llu,"
26706                       " pages read = %llu,"
26707                       " GCIs executed = %u",
26708                       instance(),
26709                       logPartPtr.p->logPartNo,
26710                       logPartPtr.p->m_redoWorkStats.m_opsSkipped,
26711                       logPartPtr.p->m_redoWorkStats.m_opsPrepared,
26712                       logPartPtr.p->m_redoWorkStats.m_pagesRead,
26713                       logPartPtr.p->m_redoWorkStats.m_gcisExecuted);
26714   for (logPartPtr.i = 0; logPartPtr.i < clogPartFileSize; logPartPtr.i++) {
26715     jam();
26716     ptrAss(logPartPtr, logPartRecord);
26717     if (logPartPtr.p->logPartState != LogPartRecord::SR_THIRD_PHASE_COMPLETED) {
26718       if (logPartPtr.p->logPartState != LogPartRecord::SR_THIRD_PHASE_STARTED) {
26719         jam();
26720         systemErrorLab(signal, __LINE__);
26721         return;
26722       } else {
26723         jam();
26724 	/* ------------------------------------------------------------------
26725 	 *  THIS LOG PART WAS NOT COMPLETED YET. EXIT AND WAIT FOR IT
26726 	 *  TO COMPLETE
26727 	 * ----------------------------------------------------------------- */
26728         return;
26729       }//if
26730     }//if
26731   }//for
26732   /* ------------------------------------------------------------------------
26733    *   ALL LOG PARTS HAVE COMPLETED THE EXECUTION OF THE LOG. WE CAN NOW START
26734    *   SENDING THE EXEC_FRAGCONF SIGNALS TO ALL INVOLVED FRAGMENTS.
26735    * ----------------------------------------------------------------------- */
26736   jam();
26737 
26738 #ifndef NO_REDO_PAGE_CACHE
26739   release(m_redo_page_cache);
26740 #endif
26741 
26742 #ifndef NO_REDO_OPEN_FILE_CACHE
26743   release(signal, m_redo_open_file_cache);
26744 #else
26745   execLogComp_extra_files_closed(signal);
26746 #endif
26747 }
26748 
26749 void
execLogComp_extra_files_closed(Signal * signal)26750 Dblqh::execLogComp_extra_files_closed(Signal * signal)
26751 {
26752   c_lcp_complete_fragments.first(fragptr);
26753   signal->theData[0] = ZSEND_EXEC_CONF;
26754   signal->theData[1] = fragptr.i;
26755   sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
26756   return;
26757 }
26758 
26759 /* --------------------------------------------------------------------------
26760  *  GO THROUGH THE FRAGMENT RECORDS TO DEDUCE TO WHICH SHALL BE SENT
26761  *  EXEC_FRAGCONF AFTER COMPLETING THE EXECUTION OF THE LOG.
26762  * ------------------------------------------------------------------------- */
sendExecConf(Signal * signal)26763 void Dblqh::sendExecConf(Signal* signal)
26764 {
26765   jamEntry();
26766   fragptr.i = signal->theData[0];
26767   Uint32 loopCount = 0;
26768   while (fragptr.i != RNIL) {
26769     c_lcp_complete_fragments.getPtr(fragptr);
26770     Uint32 next = fragptr.p->nextList;
26771     if (fragptr.p->execSrStatus != Fragrecord::IDLE) {
26772       jam();
26773       ndbrequire(fragptr.p->execSrNoReplicas - 1 < MAX_REPLICAS);
26774       for (Uint32 i = 0; i < fragptr.p->execSrNoReplicas; i++) {
26775         jam();
26776         Uint32 ref = fragptr.p->execSrBlockref[i];
26777         signal->theData[0] = fragptr.p->execSrUserptr[i];
26778 
26779         if (isNdbMtLqh())
26780         {
26781           jam();
26782           // send via own proxy
26783           signal->theData[1] = ref;
26784           sendSignal(DBLQH_REF, GSN_EXEC_FRAGCONF, signal, 2, JBB);
26785         }
26786         else if (refToInstance(ref) != 0)
26787         {
26788           jam();
26789           // send via remote proxy
26790           signal->theData[1] = ref;
26791           sendSignal(numberToRef(refToMain(ref), refToNode(ref)),
26792                      GSN_EXEC_FRAGCONF, signal, 2, JBB);
26793         }
26794         else
26795         {
26796           jam();
26797           // send direct
26798           sendSignal(ref, GSN_EXEC_FRAGCONF, signal, 1, JBB);
26799         }
26800       }//for
26801       fragptr.p->execSrNoReplicas = 0;
26802     }//if
26803     loopCount++;
26804     if (loopCount > 20) {
26805       jam();
26806       signal->theData[0] = ZSEND_EXEC_CONF;
26807       signal->theData[1] = next;
26808       sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
26809       return;
26810     } else {
26811       jam();
26812       fragptr.i = next;
26813     }//if
26814   }//while
26815   /* ----------------------------------------------------------------------
26816    *  WE HAVE NOW SENT ALL EXEC_FRAGCONF. NOW IT IS TIME TO SEND
26817    *  EXEC_SRCONF TO ALL NODES.
26818    * --------------------------------------------------------------------- */
26819   srPhase3Comp(signal);
26820 }//Dblqh::sendExecConf()
26821 
26822 /* --------------------------------------------------------------------------
26823  *       PHASE 3 HAS NOW COMPLETED. INFORM ALL OTHER NODES OF THIS EVENT.
26824  * ------------------------------------------------------------------------- */
srPhase3Comp(Signal * signal)26825 void Dblqh::srPhase3Comp(Signal* signal)
26826 {
26827   jamEntry();
26828 
26829   g_eventLogger->info("LDM(%u): Completed LDM start phase 3",
26830                       instance());
26831 
26832   signal->theData[0] = cownNodeid;
26833   if (!isNdbMtLqh())
26834   {
26835     jam();
26836     NodeReceiverGroup rg(DBLQH, m_sr_nodes);
26837     sendSignal(rg, GSN_EXEC_SRCONF, signal, 1, JBB);
26838   }
26839   else
26840   {
26841     jam();
26842     const Uint32 sz = NdbNodeBitmask::Size;
26843     m_sr_nodes.copyto(sz, &signal->theData[1]);
26844     sendSignal(DBLQH_REF, GSN_EXEC_SRCONF, signal, 1 + sz, JBB);
26845   }
26846   return;
26847 }//Dblqh::srPhase3Comp()
26848 
26849 /* ##########################################################################
26850  *    SYSTEM RESTART PHASE FOUR MODULE
26851  *    THIS MODULE IS A SUB-MODULE OF THE FILE SYSTEM HANDLING.
26852  *
26853  *    THIS MODULE SETS UP THE HEAD AND TAIL POINTERS OF THE LOG PARTS IN THE
26854  *    FRAGMENT LOG. WHEN IT IS COMPLETED IT REPORTS TO THE MASTER DIH THAT
26855  *    IT HAS COMPLETED THE PART OF THE SYSTEM RESTART WHERE THE DATABASE IS
26856  *    LOADED.
26857  *    IT ALSO OPENS THE CURRENT LOG FILE AND THE NEXT AND SETS UP THE FIRST
26858  *    LOG PAGE WHERE NEW LOG DATA IS TO BE INSERTED WHEN THE SYSTEM STARTS
26859  *    AGAIN.
26860  *
26861  *    THIS PART IS ACTUALLY EXECUTED FOR ALL RESTART TYPES.
26862  * ######################################################################### */
initFourth(Signal * signal)26863 void Dblqh::initFourth(Signal* signal)
26864 {
26865   LogFileRecordPtr locLogFilePtr;
26866   jamEntry();
26867   logPartPtr.i = signal->theData[0];
26868   ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
26869   crestartNewestGci = ZUNDEFINED_GCI_LIMIT;
26870   crestartOldestGci = ZUNDEFINED_GCI_LIMIT;
26871   /* ------------------------------------------------------------------------
26872    *       INITIALISE LOG PART AND LOG FILES AS NEEDED.
26873    * ----------------------------------------------------------------------- */
26874   logPartPtr.p->headFileNo = 0;
26875   logPartPtr.p->headPageNo = 1;
26876   logPartPtr.p->headPageIndex = ZPAGE_HEADER_SIZE + 2;
26877   logPartPtr.p->logPartState = LogPartRecord::SR_FOURTH_PHASE_STARTED;
26878   logPartPtr.p->logTailFileNo = 0;
26879   logPartPtr.p->logTailMbyte = 0;
26880   locLogFilePtr.i = logPartPtr.p->firstLogfile;
26881   ptrCheckGuard(locLogFilePtr, clogFileFileSize, logFileRecord);
26882   locLogFilePtr.p->logFileStatus = LogFileRecord::OPEN_SR_FOURTH_PHASE;
26883   openFileRw(signal, locLogFilePtr);
26884   return;
26885 }//Dblqh::initFourth()
26886 
openSrFourthPhaseLab(Signal * signal)26887 void Dblqh::openSrFourthPhaseLab(Signal* signal)
26888 {
26889   /* ------------------------------------------------------------------------
26890    *  WE HAVE NOW OPENED THE HEAD LOG FILE WE WILL NOW START READING IT
26891    *  FROM THE HEAD MBYTE TO FIND THE NEW HEAD OF THE LOG.
26892    * ----------------------------------------------------------------------- */
26893   readSinglePage(signal, logPartPtr.p->headPageNo);
26894   lfoPtr.p->lfoState = LogFileOperationRecord::READ_SR_FOURTH_PHASE;
26895   return;
26896 }//Dblqh::openSrFourthPhaseLab()
26897 
readSrFourthPhaseLab(Signal * signal)26898 void Dblqh::readSrFourthPhaseLab(Signal* signal)
26899 {
26900   if(c_diskless){
26901     jam();
26902     logPagePtr.p->logPageWord[ZPOS_LOG_LAP] = 1;
26903   }
26904 
26905   /* ------------------------------------------------------------------------
26906    *  INITIALISE ALL LOG PART INFO AND LOG FILE INFO THAT IS NEEDED TO
26907    *  START UP THE SYSTEM.
26908    * ------------------------------------------------------------------------
26909    *  INITIALISE THE NEWEST GLOBAL CHECKPOINT IDENTITY AND THE NEWEST
26910    *  COMPLETED GLOBAL CHECKPOINT IDENITY AS THE NEWEST THAT WAS RESTARTED.
26911    * ------------------------------------------------------------------------
26912    *  INITIALISE THE HEAD PAGE INDEX IN THIS PAGE.
26913    *  ASSIGN IT AS THE CURRENT LOGPAGE.
26914    *  ASSIGN THE FILE AS THE CURRENT LOG FILE.
26915    *  ASSIGN THE CURRENT FILE NUMBER FROM THE CURRENT LOG FILE AND THE NEXT
26916    *  FILE NUMBER FROM THE NEXT LOG FILE.
26917    *  ASSIGN THE CURRENT FILEPAGE FROM HEAD PAGE NUMBER.
26918    *  ASSIGN THE CURRENT MBYTE BY DIVIDING PAGE NUMBER BY 128.
26919    *  INITIALISE LOG LAP TO BE THE LOG LAP AS FOUND IN THE HEAD PAGE.
26920    *  WE HAVE TO CALCULATE THE NUMBER OF REMAINING WORDS IN THIS MBYTE.
26921    * ----------------------------------------------------------------------- */
26922   Uint32 gci = crestartNewestGci;
26923   if (crestartOldestGci > gci)
26924   {
26925     jam();
26926     /**
26927      * If "keepGci" is bigger than latest-completed-gci
26928      *   move cnewest/cnewestCompletedGci forward
26929      */
26930     ndbout_c("readSrFourthPhaseLab: gci %u => %u",
26931              gci, crestartOldestGci);
26932     gci = crestartOldestGci;
26933   }
26934   cnewestGci = gci;
26935   cnewestCompletedGci = gci;
26936   logPartPtr.p->logPartNewestCompletedGCI = cnewestCompletedGci;
26937   logPartPtr.p->currentLogfile = logFilePtr.i;
26938   logFilePtr.p->filePosition = logPartPtr.p->headPageNo;
26939   logFilePtr.p->currentMbyte =
26940                   logPartPtr.p->headPageNo >> ZTWOLOG_NO_PAGES_IN_MBYTE;
26941   logFilePtr.p->fileChangeState = LogFileRecord::NOT_ONGOING;
26942   logPartPtr.p->logLap = logPagePtr.p->logPageWord[ZPOS_LOG_LAP];
26943   logFilePtr.p->currentFilepage = logPartPtr.p->headPageNo;
26944   logFilePtr.p->currentLogpage = logPagePtr.i;
26945 
26946   initLogpage(signal);
26947   logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = logPartPtr.p->headPageIndex;
26948   logFilePtr.p->remainingWordsInMbyte =
26949     ((
26950       ((logFilePtr.p->currentMbyte + 1) * ZPAGES_IN_MBYTE) -
26951      logFilePtr.p->currentFilepage) *
26952     (ZPAGE_SIZE - ZPAGE_HEADER_SIZE)) -
26953       (logPartPtr.p->headPageIndex - ZPAGE_HEADER_SIZE);
26954   /* ------------------------------------------------------------------------
26955    *     THE NEXT STEP IS TO OPEN THE NEXT LOG FILE (IF THERE IS ONE).
26956    * ----------------------------------------------------------------------- */
26957   if (logFilePtr.p->nextLogFile != logFilePtr.i) {
26958     LogFileRecordPtr locLogFilePtr;
26959     jam();
26960     locLogFilePtr.i = logFilePtr.p->nextLogFile;
26961     ptrCheckGuard(locLogFilePtr, clogFileFileSize, logFileRecord);
26962     locLogFilePtr.p->logFileStatus = LogFileRecord::OPEN_SR_FOURTH_NEXT;
26963     openFileRw(signal, locLogFilePtr);
26964   } else {
26965     jam();
26966     /* ----------------------------------------------------------------------
26967      *  THIS CAN ONLY OCCUR IF WE HAVE ONLY ONE LOG FILE. THIS LOG FILE MUST
26968      *  BE LOG FILE ZERO AND THAT IS THE FILE WE CURRENTLY HAVE READ.
26969      *  THUS WE CAN CONTINUE IMMEDIATELY TO READ PAGE ZERO IN FILE ZERO.
26970      * --------------------------------------------------------------------- */
26971     openSrFourthZeroSkipInitLab(signal);
26972     return;
26973   }//if
26974   return;
26975 }//Dblqh::readSrFourthPhaseLab()
26976 
openSrFourthNextLab(Signal * signal)26977 void Dblqh::openSrFourthNextLab(Signal* signal)
26978 {
26979   /* ------------------------------------------------------------------------
26980    *       WE MUST ALSO HAVE FILE 0 OPEN ALL THE TIME.
26981    * ----------------------------------------------------------------------- */
26982   logFilePtr.i = logPartPtr.p->firstLogfile;
26983   ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
26984   if (logFilePtr.p->logFileStatus == LogFileRecord::OPEN) {
26985     jam();
26986     openSrFourthZeroSkipInitLab(signal);
26987     return;
26988   } else {
26989     jam();
26990     logFilePtr.p->logFileStatus = LogFileRecord::OPEN_SR_FOURTH_ZERO;
26991     openFileRw(signal, logFilePtr);
26992   }//if
26993   return;
26994 }//Dblqh::openSrFourthNextLab()
26995 
openSrFourthZeroLab(Signal * signal)26996 void Dblqh::openSrFourthZeroLab(Signal* signal)
26997 {
26998   openSrFourthZeroSkipInitLab(signal);
26999   return;
27000 }//Dblqh::openSrFourthZeroLab()
27001 
openSrFourthZeroSkipInitLab(Signal * signal)27002 void Dblqh::openSrFourthZeroSkipInitLab(Signal* signal)
27003 {
27004   if (logFilePtr.i == logPartPtr.p->currentLogfile) {
27005     if (logFilePtr.p->currentFilepage == 0) {
27006       jam();
27007       /* -------------------------------------------------------------------
27008        *  THE HEADER PAGE IN THE LOG IS PAGE ZERO IN FILE ZERO.
27009        *  THIS SHOULD NEVER OCCUR.
27010        * ------------------------------------------------------------------- */
27011       systemErrorLab(signal, __LINE__);
27012       return;
27013     }//if
27014   }//if
27015   readSinglePage(signal, 0);
27016   lfoPtr.p->lfoState = LogFileOperationRecord::READ_SR_FOURTH_ZERO;
27017   return;
27018 }//Dblqh::openSrFourthZeroSkipInitLab()
27019 
readSrFourthZeroLab(Signal * signal)27020 void Dblqh::readSrFourthZeroLab(Signal* signal)
27021 {
27022   logFilePtr.p->logPageZero = logPagePtr.i;
27023   // --------------------------------------------------------------------
27024   //   This is moved to invalidateLogAfterLastGCI(), RT453.
27025   //   signal->theData[0] = ZSR_FOURTH_COMP;
27026   //   signal->theData[1] = logPartPtr.i;
27027   //   sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
27028   // --------------------------------------------------------------------
27029 
27030   // Need to invalidate log pages after the head of the log. RT 453. EDTJAMO.
27031   // Set the start of the invalidation.
27032   logFilePtr.i = logPartPtr.p->currentLogfile;
27033   ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
27034   logPartPtr.p->invalidateFileNo = logPartPtr.p->headFileNo;
27035   logPartPtr.p->invalidatePageNo = logPartPtr.p->headPageNo;
27036   logPartPtr.p->logExecState = LogPartRecord::LES_EXEC_LOG_INVALIDATE;
27037   logPartPtr.p->firstInvalidatePageFound = false;
27038 
27039   readFileInInvalidate(signal, 3);
27040   return;
27041 }//Dblqh::readSrFourthZeroLab()
27042 
27043 /* --------------------------------------------------------------------------
27044  *     ONE OF THE LOG PARTS HAVE COMPLETED PHASE FOUR OF THE SYSTEM RESTART.
27045  *     CHECK IF ALL LOG PARTS ARE COMPLETED. IF SO SEND START_RECCONF
27046  * ------------------------------------------------------------------------- */
srFourthComp(Signal * signal)27047 void Dblqh::srFourthComp(Signal* signal)
27048 {
27049   jamEntry();
27050   logPartPtr.i = signal->theData[0];
27051   ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
27052   logPartPtr.p->logPartState = LogPartRecord::SR_FOURTH_PHASE_COMPLETED;
27053 
27054   g_eventLogger->info("LDM(%u): Completed old Redo head invalidation"
27055                       " on log part %u",
27056                       instance(),
27057                       logPartPtr.p->logPartNo);
27058 
27059   for (logPartPtr.i = 0; logPartPtr.i < clogPartFileSize; logPartPtr.i++) {
27060     jam();
27061     ptrAss(logPartPtr, logPartRecord);
27062     if (logPartPtr.p->logPartState != LogPartRecord::SR_FOURTH_PHASE_COMPLETED) {
27063       if (logPartPtr.p->logPartState != LogPartRecord::SR_FOURTH_PHASE_STARTED) {
27064         jam();
27065         systemErrorLab(signal, __LINE__);
27066         return;
27067       } else {
27068         jam();
27069 	/* ------------------------------------------------------------------
27070 	 *  THIS LOG PART WAS NOT COMPLETED YET.
27071 	 *  EXIT AND WAIT FOR IT TO COMPLETE
27072 	 * ----------------------------------------------------------------- */
27073         return;
27074       }//if
27075     }//if
27076   }//for
27077   /* ------------------------------------------------------------------------
27078    *  ALL LOG PARTS HAVE COMPLETED PHASE FOUR OF THE SYSTEM RESTART.
27079    *  WE CAN NOW SEND START_RECCONF TO THE MASTER DIH IF IT WAS A
27080    *  SYSTEM RESTART. OTHERWISE WE WILL CONTINUE WITH AN INITIAL START.
27081    *  SET LOG PART STATE TO IDLE TO
27082    *  INDICATE THAT NOTHING IS GOING ON IN THE LOG PART.
27083    * ----------------------------------------------------------------------- */
27084   for (logPartPtr.i = 0; logPartPtr.i < clogPartFileSize; logPartPtr.i++) {
27085     ptrAss(logPartPtr, logPartRecord);
27086     logPartPtr.p->logPartState = LogPartRecord::IDLE;
27087   }//for
27088 
27089   c_executing_redo_log = 0;
27090   g_eventLogger->info("LDM(%u): All redo actions complete (apply,"
27091                       " invalidate)",
27092                       instance());
27093 
27094   if ((cstartType == NodeState::ST_INITIAL_START) ||
27095       (cstartType == NodeState::ST_INITIAL_NODE_RESTART)) {
27096     jam();
27097 
27098     ndbrequire(cinitialStartOngoing == ZTRUE);
27099     cinitialStartOngoing = ZFALSE;
27100     cstartRecReq = SRR_REDO_COMPLETE;
27101     checkStartCompletedLab(signal);
27102     return;
27103   } else if ((cstartType == NodeState::ST_NODE_RESTART) ||
27104              (cstartType == NodeState::ST_SYSTEM_RESTART)) {
27105     jam();
27106 
27107     if(cstartType == NodeState::ST_SYSTEM_RESTART)
27108     {
27109       jam();
27110       if (c_lcp_complete_fragments.first(fragptr))
27111       {
27112 	jam();
27113         signal->theData[0] = ZENABLE_EXPAND_CHECK;
27114         signal->theData[1] = fragptr.i;
27115         sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
27116 	return;
27117       }
27118     }
27119 
27120     cstartRecReq = SRR_REDO_COMPLETE; // REDO complete
27121 
27122     rebuildOrderedIndexes(signal, 0);
27123     return;
27124   } else {
27125     ndbabort();
27126   }//if
27127   return;
27128 }//Dblqh::srFourthComp()
27129 
27130 /* ######################################################################### */
27131 /* #######                            ERROR MODULE                   ####### */
27132 /*                                                                           */
27133 /* ######################################################################### */
27134 
27135 /*---------------------------------------------------------------------------*/
27136 /* AN ERROR OCCURRED THAT WE WILL NOT TREAT AS SYSTEM ERROR. MOST OFTEN THIS */
27137 /* WAS CAUSED BY AN ERRONEUS SIGNAL SENT BY ANOTHER NODE. WE DO NOT WISH TO  */
27138 /* CRASH BECAUSE OF FAULTS IN OTHER NODES. THUS WE ONLY REPORT A WARNING.    */
27139 /* THIS IS CURRENTLY NOT IMPLEMENTED AND FOR THE MOMENT WE GENERATE A SYSTEM */
27140 /* ERROR SINCE WE WANT TO FIND FAULTS AS QUICKLY AS POSSIBLE IN A TEST PHASE.*/
27141 /* IN A LATER PHASE WE WILL CHANGE THIS TO BE A WARNING MESSAGE INSTEAD.     */
27142 /*---------------------------------------------------------------------------*/
27143 /*---------------------------------------------------------------------------*/
27144 /*      THIS TYPE OF ERROR SHOULD NOT GENERATE A SYSTEM ERROR IN A PRODUCT   */
27145 /*      RELEASE. THIS IS A TEMPORARY SOLUTION DURING TEST PHASE TO QUICKLY   */
27146 /*      FIND ERRORS. NORMALLY THIS SHOULD GENERATE A WARNING MESSAGE ONTO    */
27147 /*      SOME ERROR LOGGER. THIS WILL LATER BE IMPLEMENTED BY SOME SIGNAL.    */
27148 /*---------------------------------------------------------------------------*/
27149 /* ------ SYSTEM ERROR SITUATIONS ------- */
27150 /*      IN SITUATIONS WHERE THE STATE IS ERRONEOUS OR IF THE ERROR OCCURS IN */
27151 /*      THE COMMIT, COMPLETE OR ABORT PHASE, WE PERFORM A CRASH OF THE AXE VM*/
27152 /*---------------------------------------------------------------------------*/
27153 
systemErrorLab(Signal * signal,int line)27154 void Dblqh::systemErrorLab(Signal* signal, int line)
27155 {
27156   systemError(signal, line);
27157   progError(line, NDBD_EXIT_NDBREQUIRE);
27158 /*************************************************************************>*/
27159 /*       WE WANT TO INVOKE AN IMMEDIATE ERROR HERE SO WE GET THAT BY       */
27160 /*       INSERTING A CERTAIN POINTER OUT OF RANGE.                         */
27161 /*************************************************************************>*/
27162 }//Dblqh::systemErrorLab()
27163 
27164 /* ------- ERROR SITUATIONS ------- */
27165 
aiStateErrorCheckLab(Signal * signal,Uint32 * dataPtr,Uint32 length,const TcConnectionrecPtr tcConnectptr)27166 void Dblqh::aiStateErrorCheckLab(Signal* signal,
27167                                  Uint32* dataPtr,
27168                                  Uint32 length,
27169                                  const TcConnectionrecPtr tcConnectptr)
27170 {
27171   ndbrequire(tcConnectptr.p->abortState != TcConnectionrec::ABORT_IDLE);
27172   if (tcConnectptr.p->transactionState != TcConnectionrec::IDLE) {
27173       jam();
27174 /*************************************************************************>*/
27175 /*       TRANSACTION ABORT IS ONGOING. IT CAN STILL BE A PART OF AN        */
27176 /*       OPERATION THAT SHOULD CONTINUE SINCE THE TUPLE HAS NOT ARRIVED    */
27177 /*       YET. THIS IS POSSIBLE IF ACTIVE CREATION OF THE FRAGMENT IS       */
27178 /*       ONGOING.                                                          */
27179 /*************************************************************************>*/
27180     if (tcConnectptr.p->activeCreat == Fragrecord::AC_IGNORED) {
27181         jam();
27182 /*************************************************************************>*/
27183 /*       ONGOING ABORTS DURING ACTIVE CREATION MUST SAVE THE ATTRIBUTE INFO*/
27184 /*       SO THAT IT CAN BE SENT TO THE NEXT NODE IN THE COMMIT CHAIN. THIS */
27185 /*       IS NEEDED SINCE ALL ABORTS DURING CREATION OF A FRAGMENT ARE NOT  */
27186 /*       REALLY ERRORS. A MISSING TUPLE TO BE UPDATED SIMPLY MEANS THAT    */
27187 /*       IT HASN'T BEEN TRANSFERRED TO THE NEW REPLICA YET.                */
27188 /*************************************************************************>*/
27189 /*************************************************************************>*/
27190 /*       AFTER THIS ERROR THE ABORT MUST BE COMPLETED. TO ENSURE THIS SET  */
27191 /*       ACTIVE CREATION TO FALSE. THIS WILL ENSURE THAT THE ABORT IS      */
27192 /*       COMPLETED.                                                        */
27193 /*************************************************************************>*/
27194       if (saveAttrInfoInSection(dataPtr, length, tcConnectptr.p) == ZOK) {
27195         jam();
27196         if (tcConnectptr.p->transactionState ==
27197             TcConnectionrec::WAIT_AI_AFTER_ABORT) {
27198           if (tcConnectptr.p->currTupAiLen == tcConnectptr.p->totReclenAi) {
27199             jam();
27200 /*************************************************************************>*/
27201 /*       WE WERE WAITING FOR MORE ATTRIBUTE INFO AFTER A SUCCESSFUL ABORT  */
27202 /*       IN ACTIVE CREATION STATE. THE TRANSACTION SHOULD CONTINUE AS IF   */
27203 /*       IT WAS COMMITTED. NOW ALL INFO HAS ARRIVED AND WE CAN CONTINUE    */
27204 /*       WITH NORMAL PROCESSING AS IF THE TRANSACTION WAS PREPARED.        */
27205 /*       SINCE THE FRAGMENT IS UNDER CREATION WE KNOW THAT LOGGING IS      */
27206 /*       DISABLED. WE STILL HAVE TO CATER FOR DIRTY OPERATION OR NOT.      */
27207 /*************************************************************************>*/
27208             tcConnectptr.p->abortState = TcConnectionrec::ABORT_IDLE;
27209             rwConcludedAiLab(signal, tcConnectptr);
27210             return;
27211           } else {
27212             ndbrequire(tcConnectptr.p->currTupAiLen < tcConnectptr.p->totReclenAi);
27213             jam();
27214             return;	/* STILL WAITING FOR MORE ATTRIBUTE INFO */
27215           }//if
27216         }//if
27217       } else {
27218         jam();
27219 /*************************************************************************>*/
27220 /*       AFTER THIS ERROR THE ABORT MUST BE COMPLETED. TO ENSURE THIS SET  */
27221 /*       ACTIVE CREATION TO ABORT. THIS WILL ENSURE THAT THE ABORT IS      */
27222 /*       COMPLETED AND THAT THE ERROR CODE IS PROPERLY SET                 */
27223 /*************************************************************************>*/
27224         tcConnectptr.p->errorCode = terrorCode;
27225         tcConnectptr.p->activeCreat = Fragrecord::AC_NORMAL;
27226         if (tcConnectptr.p->transactionState ==
27227 	    TcConnectionrec::WAIT_AI_AFTER_ABORT) {
27228           jam();
27229 /*************************************************************************>*/
27230 /*       ABORT IS ALREADY COMPLETED. WE NEED TO RESTART IT FROM WHERE IT   */
27231 /*       WAS INTERRUPTED.                                                  */
27232 /*************************************************************************>*/
27233           continueAbortLab(signal, tcConnectptr);
27234           return;
27235         } else {
27236           jam();
27237           return;
27238 /*************************************************************************>*/
27239 // Abort is ongoing. It will complete since we set the activeCreat = AC_NORMAL
27240 /*************************************************************************>*/
27241         }//if
27242       }//if
27243     }//if
27244   }//if
27245 /*************************************************************************>*/
27246 /* TRANSACTION HAVE BEEN ABORTED. THUS IGNORE ALL SIGNALS BELONGING TO IT. */
27247 /*************************************************************************>*/
27248   return;
27249 }//Dblqh::aiStateErrorCheckLab()
27250 
takeOverErrorLab(Signal * signal,const TcConnectionrecPtr tcConnectptr)27251 void Dblqh::takeOverErrorLab(Signal* signal,
27252                              const TcConnectionrecPtr tcConnectptr)
27253 {
27254   terrorCode = ZTAKE_OVER_ERROR;
27255   abortErrorLab(signal, tcConnectptr);
27256   return;
27257 }//Dblqh::takeOverErrorLab()
27258 
27259 /* ##########################################################################
27260  *               TEST MODULE
27261  * ######################################################################### */
27262 #ifdef VM_TRACE
execTESTSIG(Signal * signal)27263 void Dblqh::execTESTSIG(Signal* signal)
27264 {
27265   jamEntry();
27266   Uint32 userpointer = signal->theData[0];
27267   BlockReference userblockref = signal->theData[1];
27268   Uint32 testcase = signal->theData[2];
27269 
27270   signal->theData[0] = userpointer;
27271   signal->theData[1] = cownref;
27272   signal->theData[2] = testcase;
27273   sendSignal(userblockref, GSN_TESTSIG, signal, 25, JBB);
27274   return;
27275 }//Dblqh::execTESTSIG()
27276 
27277 /* *************** */
27278 /*  MEMCHECKREQ  > */
27279 /* *************** */
27280 /* ************************************************************************>>
27281  * THIS SIGNAL IS PURELY FOR TESTING PURPOSES. IT CHECKS THE FREE LIST
27282  * AND REPORTS THE NUMBER OF FREE RECORDS.
27283  * THIS CAN BE DONE TO ENSURE THAT NO RECORDS HAS BEEN LOST
27284  * ************************************************************************> */
execMEMCHECKREQ(Signal * signal)27285 void Dblqh::execMEMCHECKREQ(Signal* signal)
27286 {
27287   Uint32* dataPtr = &signal->theData[0];
27288   jamEntry();
27289   BlockReference userblockref = signal->theData[0];
27290   Uint32 index = 0;
27291   for (Uint32 i = 0; i < 7; i++)
27292     dataPtr[i] = 0;
27293   addfragptr.i = cfirstfreeAddfragrec;
27294   while (addfragptr.i != RNIL) {
27295     ptrCheckGuard(addfragptr, caddfragrecFileSize, addFragRecord);
27296     addfragptr.i = addfragptr.p->nextAddfragrec;
27297     dataPtr[index]++;
27298   }//while
27299   index++;
27300   dataPtr[index]= 0;
27301   index++;
27302   dataPtr[index]= 0;
27303   index++;
27304   for (tabptr.i = 0;
27305        tabptr.i < ctabrecFileSize;
27306        tabptr.i++) {
27307     ptrAss(tabptr, tablerec);
27308     if (tabptr.p->tableStatus == Tablerec::NOT_DEFINED) {
27309       dataPtr[index]++;
27310     }//if
27311   }//for
27312   index++;
27313   dataPtr[index] = ctcNumFree;
27314   sendSignal(userblockref, GSN_MEMCHECKCONF, signal, 10, JBB);
27315   return;
27316 }//Dblqh::execMEMCHECKREQ()
27317 
27318 #endif
27319 
27320 /* ************************************************************************* */
27321 /* ************************* STATEMENT BLOCKS ****************************** */
27322 /* ************************************************************************* */
27323 /* ========================================================================= */
27324 /* ====== BUILD LINKED LIST OF LOG PAGES AFTER RECEIVING FSREADCONF  ======= */
27325 /*                                                                           */
27326 /* ========================================================================= */
buildLinkedLogPageList(Signal * signal)27327 void Dblqh::buildLinkedLogPageList(Signal* signal)
27328 {
27329   LogPageRecordPtr bllLogPagePtr;
27330 
27331   arrGuard(lfoPtr.p->noPagesRw - 1, 16);
27332   arrGuard(lfoPtr.p->noPagesRw, 16);
27333   Uint32 prev = RNIL;
27334   for (UintR tbllIndex = 0; tbllIndex < lfoPtr.p->noPagesRw; tbllIndex++) {
27335     jam();
27336     /* ----------------------------------------------------------------------
27337      *  BUILD LINKED LIST BUT ALSO ENSURE THAT PAGE IS NOT SEEN AS DIRTY
27338      *  INITIALLY.
27339      * --------------------------------------------------------------------- */
27340     bllLogPagePtr.i = lfoPtr.p->logPageArray[tbllIndex];
27341     ptrCheckGuard(bllLogPagePtr, clogPageFileSize, logPageRecord);
27342 
27343 // #if VM_TRACE
27344 //     // Check logPage checksum before modifying it
27345 //     Uint32 calcCheckSum = calcPageCheckSum(bllLogPagePtr);
27346 //     Uint32 checkSum = bllLogPagePtr.p->logPageWord[ZPOS_CHECKSUM];
27347 //     if (checkSum != calcCheckSum) {
27348 //       ndbout << "Redolog: Checksum failure." << endl;
27349 //       progError(__LINE__, NDBD_EXIT_NDBREQUIRE, "Redolog: Checksum failure.");
27350 //     }
27351 // #endif
27352 
27353     bllLogPagePtr.p->logPageWord[ZPREV_PAGE] = prev;
27354     bllLogPagePtr.p->logPageWord[ZNEXT_PAGE] =
27355       lfoPtr.p->logPageArray[tbllIndex + 1];
27356     bllLogPagePtr.p->logPageWord[ZPOS_DIRTY] = ZNOT_DIRTY;
27357     prev = bllLogPagePtr.i;
27358   }//for
27359   bllLogPagePtr.i = lfoPtr.p->logPageArray[lfoPtr.p->noPagesRw - 1];
27360   ptrCheckGuard(bllLogPagePtr, clogPageFileSize, logPageRecord);
27361   bllLogPagePtr.p->logPageWord[ZNEXT_PAGE] = RNIL;
27362 }//Dblqh::buildLinkedLogPageList()
27363 
27364 /* =========================================================================
27365  * =======                      CHANGE TO NEXT MBYTE IN LOG           =======
27366  *
27367  * ========================================================================= */
changeMbyte(Signal * signal)27368 void Dblqh::changeMbyte(Signal* signal)
27369 {
27370   writeNextLog(signal);
27371   writeFileDescriptor(signal);
27372 }//Dblqh::changeMbyte()
27373 
27374 /* ========================================================================= */
27375 /* ======       CHECK IF THIS COMMIT LOG RECORD IS TO BE EXECUTED    ======= */
27376 /*                                                                           */
27377 /*      SUBROUTINE SHORT NAME = CEL                                          */
27378 /* ========================================================================= */
checkIfExecLog(Signal * signal,const TcConnectionrecPtr tcConnectptr)27379 Uint32 Dblqh::checkIfExecLog(Signal* signal,
27380                              const TcConnectionrecPtr tcConnectptr)
27381 {
27382   tabptr.i = tcConnectptr.p->tableref;
27383   ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
27384   if (getFragmentrec(signal, tcConnectptr.p->fragmentid) &&
27385       (table_version_major(tabptr.p->schemaVersion) ==
27386        table_version_major(tcConnectptr.p->schemaVersion))) {
27387     if (fragptr.p->execSrStatus != Fragrecord::IDLE) {
27388       if (fragptr.p->execSrNoReplicas > logPartPtr.p->execSrExecuteIndex) {
27389         ndbrequire((fragptr.p->execSrNoReplicas - 1) < MAX_REPLICAS);
27390         for (Uint32 i = logPartPtr.p->execSrExecuteIndex;
27391 	     i < fragptr.p->execSrNoReplicas;
27392 	     i++) {
27393           jam();
27394           if (tcConnectptr.p->gci_hi >= fragptr.p->execSrStartGci[i]) {
27395             if (tcConnectptr.p->gci_hi <= fragptr.p->execSrLastGci[i]) {
27396               jam();
27397               logPartPtr.p->execSrExecuteIndex = i;
27398               return ZOK;
27399             }//if
27400           }//if
27401         }//for
27402       }//if
27403     }//if
27404   }//if
27405   return ZNOT_OK;
27406 }//Dblqh::checkIfExecLog()
27407 
27408 /* ========================================================================= */
27409 /* == CHECK IF THERE IS LESS THAN 192 KBYTE IN THE BUFFER PLUS INCOMING  === */
27410 /*      READS ALREADY STARTED. IF SO IS THE CASE THEN START ANOTHER READ IF  */
27411 /*      THERE ARE MORE PAGES IN THIS MBYTE.                                  */
27412 /*                                                                           */
27413 /* ========================================================================= */
checkReadExecSr(Signal * signal)27414 void Dblqh::checkReadExecSr(Signal* signal)
27415 {
27416   logPartPtr.p->logExecState = LogPartRecord::LES_EXEC_LOG;
27417   logPartPtr.p->execSrPagesRead = logPartPtr.p->execSrPagesRead + 8;
27418   logPartPtr.p->execSrPagesReading = logPartPtr.p->execSrPagesReading - 8;
27419   if ((logPartPtr.p->execSrPagesRead + logPartPtr.p->execSrPagesReading) <
27420       ZREAD_AHEAD_SIZE) {
27421     jam();
27422     /* ----------------------------------------------------------------------
27423      *  WE HAVE LESS THAN 64 KBYTE OF LOG PAGES REMAINING IN MEMORY OR ON
27424      *  ITS WAY TO MAIN MEMORY. READ IN 8 MORE PAGES.
27425      * --------------------------------------------------------------------- */
27426     if ((logPartPtr.p->execSrPagesRead + logPartPtr.p->execSrPagesExecuted) <
27427 	ZPAGES_IN_MBYTE) {
27428       jam();
27429       /* --------------------------------------------------------------------
27430        *  THERE ARE MORE PAGES TO READ IN THIS MBYTE. READ THOSE FIRST
27431        *  IF >= ZPAGES_IN_MBYTE THEN THERE ARE NO MORE PAGES TO READ. THUS
27432        *  WE PROCEED WITH EXECUTION OF THE LOG.
27433        * ------------------------------------------------------------------- */
27434       readExecSr(signal);
27435       logPartPtr.p->logExecState = LogPartRecord::LES_WAIT_READ_EXEC_SR;
27436     }//if
27437   }//if
27438 }//Dblqh::checkReadExecSr()
27439 
27440 /* ========================================================================= */
27441 /* ==== CHECK IF START OF NEW FRAGMENT IS COMPLETED AND WE CAN       ======= */
27442 /* ==== GET THE START GCI                                            ======= */
27443 /*                                                                           */
27444 /*      SUBROUTINE SHORT NAME = CTC                                          */
27445 /* ========================================================================= */
checkScanTcCompleted(Signal * signal,const TcConnectionrecPtr tcConnectptr)27446 void Dblqh::checkScanTcCompleted(Signal* signal,
27447                                  const TcConnectionrecPtr tcConnectptr)
27448 {
27449   tcConnectptr.p->logWriteState = TcConnectionrec::NOT_STARTED;
27450   fragptr.i = tcConnectptr.p->fragmentptr;
27451   c_fragment_pool.getPtr(fragptr);
27452   fragptr.p->activeTcCounter = fragptr.p->activeTcCounter - 1;
27453   if (fragptr.p->activeTcCounter == 0) {
27454     jam();
27455     fragptr.p->startGci = cnewestGci + 1;
27456     tabptr.i = tcConnectptr.p->tableref;
27457     ptrCheckGuard(tabptr, ctabrecFileSize, tablerec);
27458     sendCopyActiveConf(signal, tcConnectptr.p->tableref);
27459   }//if
27460 }//Dblqh::checkScanTcCompleted()
27461 
27462 /* ------------------------------------------------------------------------- */
27463 /* ------       CLOSE A FILE DURING EXECUTION OF FRAGMENT LOG        ------- */
27464 /*                                                                           */
27465 /* ------------------------------------------------------------------------- */
closeFile(Signal * signal,LogFileRecordPtr clfLogFilePtr,Uint32 line)27466 void Dblqh::closeFile(Signal* signal,
27467 		      LogFileRecordPtr clfLogFilePtr, Uint32 line)
27468 {
27469   signal->theData[0] = clfLogFilePtr.p->fileRef;
27470   signal->theData[1] = cownref;
27471   signal->theData[2] = clfLogFilePtr.i;
27472   signal->theData[3] = ZCLOSE_NO_DELETE;
27473   signal->theData[4] = line;
27474   sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, signal, 5, JBA);
27475 }//Dblqh::closeFile()
27476 
27477 
27478 /* ---------------------------------------------------------------- */
27479 /* ---------------- A LOG PAGE HAVE BEEN COMPLETED ---------------- */
27480 /*                                                                  */
27481 /*       SUBROUTINE SHORT NAME = CLP                                */
27482 // Input Pointers:
27483 // logFilePtr
27484 // logPagePtr
27485 // logPartPtr
27486 // Defines lfoPtr
27487 /* ---------------------------------------------------------------- */
completedLogPage(Signal * signal,Uint32 clpType,Uint32 place,bool sync_flag)27488 void Dblqh::completedLogPage(Signal* signal,
27489                              Uint32 clpType,
27490                              Uint32 place,
27491                              bool sync_flag)
27492 {
27493   LogPageRecordPtr clpLogPagePtr;
27494   LogPageRecordPtr wlpLogPagePtr;
27495   UintR twlpNoPages;
27496   UintR twlpType;
27497 
27498   if (logFilePtr.p->firstFilledPage == RNIL) {
27499     jam();
27500     logFilePtr.p->firstFilledPage = logPagePtr.i;
27501   } else {
27502     jam();
27503     clpLogPagePtr.i = logFilePtr.p->lastFilledPage;
27504     ptrCheckGuard(clpLogPagePtr, clogPageFileSize, logPageRecord);
27505     clpLogPagePtr.p->logPageWord[ZNEXT_PAGE] = logPagePtr.i;
27506   }//if
27507   logFilePtr.p->lastFilledPage = logPagePtr.i;
27508   logPagePtr.p->logPageWord[ZNEXT_PAGE] = RNIL;
27509   logFilePtr.p->noLogpagesInBuffer = logFilePtr.p->noLogpagesInBuffer + 1;
27510   if (logFilePtr.p->noLogpagesInBuffer != ZMAX_PAGES_WRITTEN) {
27511     if (clpType != ZLAST_WRITE_IN_FILE) {
27512       if (clpType != ZENFORCE_WRITE) {
27513         jam();
27514         return;
27515       }//if
27516     }//if
27517   }//if
27518   twlpType = clpType;
27519 /* ------------------------------------------------------------------------- */
27520 /* ------               WRITE A SET OF LOG PAGES TO DISK             ------- */
27521 /*                                                                           */
27522 /*      SUBROUTINE SHORT NAME: WLP                                           */
27523 /* ------------------------------------------------------------------------- */
27524   seizeLfo(signal);
27525   initLfo(signal);
27526   Uint32* dataPtr = &signal->theData[6];
27527   twlpNoPages = 0;
27528   wlpLogPagePtr.i = logFilePtr.p->firstFilledPage;
27529   do {
27530     dataPtr[twlpNoPages] = wlpLogPagePtr.i;
27531     twlpNoPages++;
27532     ptrCheckGuard(wlpLogPagePtr, clogPageFileSize, logPageRecord);
27533 
27534     writeDbgInfoPageHeader(wlpLogPagePtr, place,
27535                            logFilePtr.p->filePosition + twlpNoPages - 1,
27536                            ZPAGE_SIZE);
27537     // Calculate checksum for page
27538     wlpLogPagePtr.p->logPageWord[ZPOS_CHECKSUM] = calcPageCheckSum(wlpLogPagePtr);
27539     wlpLogPagePtr.i = wlpLogPagePtr.p->logPageWord[ZNEXT_PAGE];
27540   } while (wlpLogPagePtr.i != RNIL);
27541   ndbrequire(twlpNoPages < 9);
27542   dataPtr[twlpNoPages] = logFilePtr.p->filePosition;
27543 /* -------------------------------------------------- */
27544 /*       SET TIMER ON THIS LOG PART TO SIGNIFY THAT A */
27545 /*       LOG RECORD HAS BEEN SENT AT THIS TIME.       */
27546 /* -------------------------------------------------- */
27547   logPartPtr.p->logPartTimer = logPartPtr.p->logTimer;
27548   signal->theData[0] = logFilePtr.p->fileRef;
27549   signal->theData[1] = cownref;
27550   signal->theData[2] = lfoPtr.i;
27551   if (twlpType == ZLAST_WRITE_IN_FILE || sync_flag) {
27552     jam();
27553     signal->theData[3] = ZLIST_OF_MEM_PAGES_SYNCH;
27554   } else {
27555     jam();
27556     signal->theData[3] = ZLIST_OF_MEM_PAGES;
27557   }//if
27558   signal->theData[4] = ZVAR_NO_LOG_PAGE_WORD;
27559   signal->theData[5] = twlpNoPages;
27560   sendSignal(NDBFS_REF, GSN_FSWRITEREQ, signal, 15, JBA);
27561 
27562   ndbrequire(logFilePtr.p->fileRef != RNIL);
27563 
27564   logPartPtr.p->m_io_tracker.send_io(32768*twlpNoPages);
27565 
27566   if (DEBUG_REDO)
27567   {
27568     ndbout_c("writing %d pages at part: %u file: %u page: %u (mb: %u)",
27569              twlpNoPages,
27570              logPartPtr.p->logPartNo,
27571              logFilePtr.p->fileNo,
27572              logFilePtr.p->filePosition,
27573              logFilePtr.p->filePosition >> ZTWOLOG_NO_PAGES_IN_MBYTE);
27574   }
27575 
27576   if (twlpType == ZNORMAL) {
27577     jam();
27578     lfoPtr.p->lfoState = LogFileOperationRecord::ACTIVE_WRITE_LOG;
27579   } else if (twlpType == ZLAST_WRITE_IN_FILE) {
27580     jam();
27581     lfoPtr.p->lfoState = LogFileOperationRecord::LAST_WRITE_IN_FILE;
27582   } else {
27583     ndbrequire(twlpType == ZENFORCE_WRITE);
27584     jam();
27585     lfoPtr.p->lfoState = LogFileOperationRecord::ACTIVE_WRITE_LOG;
27586   }//if
27587   /* ----------------------------------------------------------------------- */
27588   /* ------       MOVE PAGES FROM LOG FILE TO LFO RECORD             ------- */
27589   /*                                                                         */
27590   /* ----------------------------------------------------------------------- */
27591   /* -------------------------------------------------- */
27592   /*       MOVE PAGES TO LFO RECORD AND REMOVE THEM     */
27593   /*       FROM LOG FILE RECORD.                        */
27594   /* -------------------------------------------------- */
27595   lfoPtr.p->firstLfoPage = logFilePtr.p->firstFilledPage;
27596   logFilePtr.p->firstFilledPage = RNIL;
27597   logFilePtr.p->lastFilledPage = RNIL;
27598   logFilePtr.p->noLogpagesInBuffer = 0;
27599 
27600   lfoPtr.p->noPagesRw = twlpNoPages;
27601   lfoPtr.p->lfoPageNo = logFilePtr.p->filePosition;
27602   lfoPtr.p->lfoWordWritten = ZPAGE_SIZE - 1;
27603   logFilePtr.p->filePosition += twlpNoPages;
27604 }//Dblqh::completedLogPage()
27605 
27606 /* ---------------------------------------------------------------- */
27607 /* ---------------- DELETE FRAGMENT RECORD ------------------------ */
27608 /*                                                                  */
27609 /*       SUBROUTINE SHORT NAME = DFR                                */
27610 /* ---------------------------------------------------------------- */
deleteFragrec(Uint32 fragId)27611 void Dblqh::deleteFragrec(Uint32 fragId)
27612 {
27613   Uint32 indexFound= RNIL;
27614   fragptr.i = RNIL;
27615   for (Uint32 i = 0; i < NDB_ARRAY_SIZE(tabptr.p->fragid); i++) {
27616     jam();
27617     if (tabptr.p->fragid[i] == fragId) {
27618       fragptr.i = tabptr.p->fragrec[i];
27619       indexFound = i;
27620       break;
27621     }//if
27622   }//for
27623   if (fragptr.i != RNIL) {
27624     jam();
27625     c_fragment_pool.getPtr(fragptr);
27626     if (fragptr.p->lcp_frag_ord_state == Fragrecord::LCP_QUEUED)
27627     {
27628       jam();
27629       /**
27630        * We delete the fragment even before it has been checkpointed
27631        * its first time. Need to remove it from LCP queue.
27632        */
27633       ndbrequire(fragptr.p->m_create_table_flag_lcp_frag_ord);
27634       c_queued_lcp_frag_ord.remove(fragptr);
27635       fragptr.p->m_create_table_flag_lcp_frag_ord = false;
27636       fragptr.p->m_create_table_insert_lcp = false;
27637       fragptr.p->lcp_frag_ord_state = Fragrecord::LCP_EXECUTED;
27638     }
27639     ndbrequire(fragptr.p->lcp_frag_ord_state == Fragrecord::LCP_EXECUTED ||
27640       fragptr.p->lcp_frag_ord_state ==
27641         Fragrecord::LCP_EXECUTED_BY_CREATE_TABLE);
27642     fragptr.p->lcp_frag_ord_state = Fragrecord::LCP_EXECUTED;
27643     ndbrequire(fragptr.p->fragStatus != Fragrecord::FREE);
27644     tabptr.p->fragid[indexFound] = ZNIL;
27645     tabptr.p->fragrec[indexFound] = RNIL;
27646     fragptr.p->fragStatus = Fragrecord::FREE;
27647     c_fragment_pool.release(fragptr);
27648   }
27649 }//Dblqh::deleteFragrec()
27650 
27651 /* ------------------------------------------------------------------------- */
27652 /* -------          FIND LOG FILE RECORD GIVEN FILE NUMBER           ------- */
27653 /*                                                                           */
27654 /*       INPUT:          TFLF_FILE_NO    THE FILE NUMBER                     */
27655 /*                       FLF_LOG_PART_PTR THE LOG PART RECORD                */
27656 /*       OUTPUT:         FLF_LOG_FILE_PTR THE FOUND LOG FILE RECORD          */
27657 /*       SUBROUTINE SHORT NAME = FLF                                         */
27658 /* ------------------------------------------------------------------------- */
findLogfile(Signal * signal,Uint32 fileNo,LogPartRecordPtr flfLogPartPtr,LogFileRecordPtr * parLogFilePtr)27659 void Dblqh::findLogfile(Signal* signal,
27660                         Uint32 fileNo,
27661                         LogPartRecordPtr flfLogPartPtr,
27662                         LogFileRecordPtr* parLogFilePtr)
27663 {
27664   LogFileRecordPtr locLogFilePtr;
27665   locLogFilePtr.i = flfLogPartPtr.p->firstLogfile;
27666   Uint32 loopCount = 0;
27667   while (true) {
27668     ptrCheckGuard(locLogFilePtr, clogFileFileSize, logFileRecord);
27669     if (locLogFilePtr.p->fileNo == fileNo) {
27670       jam();
27671       ndbrequire(loopCount == fileNo);
27672       parLogFilePtr->i = locLogFilePtr.i;
27673       parLogFilePtr->p = locLogFilePtr.p;
27674       return;
27675     }//if
27676     locLogFilePtr.i = locLogFilePtr.p->nextLogFile;
27677     loopCount++;
27678     if (loopCount >= flfLogPartPtr.p->noLogFiles &&
27679 	getNodeState().startLevel != NodeState::SL_STARTED)
27680     {
27681       goto error;
27682     }
27683     ndbrequire(loopCount < flfLogPartPtr.p->noLogFiles);
27684   }//while
27685 
27686 error:
27687   char buf[255];
27688   BaseString::snprintf(buf, sizeof(buf),
27689 		       "Unable to restart, failed while reading redo."
27690 		       " Likely invalid change of configuration");
27691   progError(__LINE__,
27692 	    NDBD_EXIT_INVALID_CONFIG,
27693 	    buf);
27694 }//Dblqh::findLogfile()
27695 
27696 /* ------------------------------------------------------------------------- */
27697 /* ------     FIND PAGE REFERENCE IN MEMORY BUFFER AT LOG EXECUTION  ------- */
27698 /*                                                                           */
27699 /* ------------------------------------------------------------------------- */
findPageRef(Signal * signal,CommitLogRecord * commitLogRecord)27700 void Dblqh::findPageRef(Signal* signal, CommitLogRecord* commitLogRecord)
27701 {
27702   UintR tfprIndex;
27703 
27704   logPagePtr.i = RNIL;
27705   if (ERROR_INSERTED(5020)) {
27706     // Force system to read page from disk
27707     return;
27708   }
27709   pageRefPtr.i = logPartPtr.p->lastPageRef;
27710   do {
27711     ptrCheckGuard(pageRefPtr, cpageRefFileSize, pageRefRecord);
27712     if (commitLogRecord->fileNo == pageRefPtr.p->prFileNo) {
27713       if (commitLogRecord->startPageNo >= pageRefPtr.p->prPageNo) {
27714         if (commitLogRecord->startPageNo < (Uint16) (pageRefPtr.p->prPageNo + 8)) {
27715           jam();
27716           tfprIndex = commitLogRecord->startPageNo - pageRefPtr.p->prPageNo;
27717           logPagePtr.i = pageRefPtr.p->pageRef[tfprIndex];
27718           ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
27719           return;
27720         }//if
27721       }//if
27722     }//if
27723     pageRefPtr.i = pageRefPtr.p->prPrev;
27724   } while (pageRefPtr.i != RNIL);
27725 
27726 #ifndef NO_REDO_PAGE_CACHE
27727   RedoPageCache& cache = m_redo_page_cache;
27728   RedoCacheLogPageRecord key;
27729   key.m_part_no = logPartPtr.p->logPartNo;
27730   key.m_file_no = commitLogRecord->fileNo;
27731   key.m_page_no = commitLogRecord->startPageNo;
27732   Ptr<RedoCacheLogPageRecord> pagePtr;
27733   if (cache.m_hash.find(pagePtr, key))
27734   {
27735     jam();
27736     if (cache.m_lru.hasPrev(pagePtr))
27737     {
27738       jam();
27739       cache.m_lru.remove(pagePtr);
27740       cache.m_lru.addFirst(pagePtr);
27741     }
27742     logPagePtr.i = pagePtr.i;
27743     ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
27744 
27745     Ptr<LogPageRecord> loopPtr = logPagePtr;
27746     Uint32 extra = commitLogRecord->stopPageNo - commitLogRecord->startPageNo;
27747     for (Uint32 i = 0; i<extra; i++)
27748     {
27749       jam();
27750       Uint32 prevPtrI = loopPtr.i;
27751       loopPtr.i = loopPtr.p->logPageWord[ZNEXT_PAGE];
27752       if (loopPtr.i == RNIL)
27753       {
27754         jam();
27755         /**
27756          * next page is not linked
27757          *   check if it's added as a "single" page
27758          */
27759         key.m_page_no = commitLogRecord->startPageNo + i + 1;
27760         if (cache.m_hash.find(pagePtr, key))
27761         {
27762           jam();
27763           /**
27764            * Yes it is...link them
27765            */
27766           Ptr<LogPageRecord> tmp;
27767           tmp.i = pagePtr.i;
27768           tmp.p = reinterpret_cast<LogPageRecord*>(pagePtr.p);
27769           tmp.p->logPageWord[ZPREV_PAGE] = prevPtrI;
27770           loopPtr.p->logPageWord[ZNEXT_PAGE] = tmp.i;
27771           loopPtr.i = tmp.i;
27772         }
27773         else
27774         {
27775           jam();
27776           logPagePtr.i = RNIL;
27777           cache.m_multi_miss++;
27778           if (0)
27779           ndbout_c("Found part: %u file: %u page: %u but not next page(%u) %u",
27780                    key.m_part_no,
27781                    commitLogRecord->fileNo,
27782                    commitLogRecord->startPageNo,
27783                    (i + 1),
27784                    commitLogRecord->startPageNo + i + 1);
27785           return;
27786         }
27787       }
27788 
27789       ptrCheckGuard(loopPtr, clogPageFileSize, logPageRecord);
27790       pagePtr.i = loopPtr.i;
27791       pagePtr.p = reinterpret_cast<RedoCacheLogPageRecord*>(loopPtr.p);
27792       if (cache.m_lru.hasPrev(pagePtr))
27793       {
27794         jam();
27795         cache.m_lru.remove(pagePtr);
27796         cache.m_lru.addFirst(pagePtr);
27797       }
27798     }
27799     cache.m_hits++;
27800     if (extra)
27801     {
27802       jam();
27803       cache.m_multi_page++;
27804     }
27805   }
27806 #endif
27807 }//Dblqh::findPageRef()
27808 
27809 /* ------------------------------------------------------------------------- */
27810 /* ------         GET FIRST OPERATION QUEUED FOR LOGGING             ------- */
27811 /*                                                                           */
27812 /*      SUBROUTINE SHORT NAME = GFL                                          */
27813 /* ------------------------------------------------------------------------- */
27814 void
getFirstInLogQueue(Signal * signal,Ptr<TcConnectionrec> & dst)27815 Dblqh::getFirstInLogQueue(Signal* signal,
27816                           Ptr<TcConnectionrec> & dst)
27817 {
27818   TcConnectionrecPtr tmp;
27819 /* -------------------------------------------------- */
27820 /*       GET THE FIRST FROM THE LOG QUEUE AND REMOVE  */
27821 /*       IT FROM THE QUEUE.                           */
27822 /* -------------------------------------------------- */
27823   LogPartRecord::OperationQueue * queue = &logPartPtr.p->m_log_complete_queue;
27824   tmp.i = queue->firstElement;
27825   if (tmp.i == RNIL)
27826   {
27827     jam();
27828     queue = &logPartPtr.p->m_log_prepare_queue;
27829     tmp.i = queue->firstElement;
27830   }
27831   ndbrequire(tcConnect_pool.getValidPtr(tmp));
27832   queue->firstElement = tmp.p->nextTcLogQueue;
27833   if (queue->firstElement == RNIL) {
27834     jam();
27835     queue->lastElement = RNIL;
27836   }
27837   else
27838   {
27839     /**
27840      * Also needs to set the prev pointer of the new first
27841      * element to RNIL to indicate it is now first in the
27842      * list.
27843      */
27844     TcConnectionrecPtr new_first;
27845     new_first.i = tmp.p->nextTcLogQueue;
27846     ndbrequire(tcConnect_pool.getValidPtr(new_first));
27847     new_first.p->prevTcLogQueue = RNIL;
27848   }
27849   tmp.p->nextTcLogQueue = RNIL;
27850   tmp.p->prevTcLogQueue = RNIL;
27851   dst = tmp;
27852 }//Dblqh::getFirstInLogQueue()
27853 
27854 void
remove_from_prepare_log_queue(Signal * signal,TcConnectionrecPtr tcPtr)27855 Dblqh::remove_from_prepare_log_queue(Signal *signal,
27856                                      TcConnectionrecPtr tcPtr)
27857 {
27858   TcConnectionrecPtr tmp;
27859   LogPartRecordPtr regLogPartPtr;
27860   regLogPartPtr.i = tcPtr.p->m_log_part_ptr_i;
27861   ptrCheckGuard(regLogPartPtr, clogPartFileSize, logPartRecord);
27862   LogPartRecord::OperationQueue *queue = &regLogPartPtr.p->m_log_prepare_queue;
27863 
27864   if (tcPtr.p->prevTcLogQueue == RNIL)
27865   {
27866     jam();
27867     ndbrequire(queue->firstElement == tcPtr.i);
27868     queue->firstElement = tcPtr.p->nextTcLogQueue;
27869   }
27870   else
27871   {
27872     jam();
27873     tmp.i = tcPtr.p->prevTcLogQueue;
27874     ndbrequire(tcConnect_pool.getValidPtr(tmp));
27875     tmp.p->nextTcLogQueue = tcPtr.p->nextTcLogQueue;
27876   }
27877 
27878   if (tcPtr.p->nextTcLogQueue == RNIL)
27879   {
27880     jam();
27881     ndbrequire(queue->lastElement == tcPtr.i);
27882     queue->lastElement = tcPtr.p->prevTcLogQueue;
27883   }
27884   else
27885   {
27886     jam();
27887     tmp.i = tcPtr.p->nextTcLogQueue;
27888     ndbrequire(tcConnect_pool.getValidPtr(tmp));
27889     tmp.p->prevTcLogQueue = tcPtr.p->prevTcLogQueue;
27890   }
27891 
27892   tcPtr.p->prevTcLogQueue = RNIL;
27893   tcPtr.p->nextTcLogQueue = RNIL;
27894 }
27895 
27896 /* ---------------------------------------------------------------- */
27897 /* ---------------- GET FRAGMENT RECORD --------------------------- */
27898 /*       INPUT:          TFRAGID         FRAGMENT ID LOOKING FOR    */
27899 /*                       TABPTR          TABLE ID                   */
27900 /*       SUBROUTINE SHORT NAME = GFR                                */
27901 /* ---------------------------------------------------------------- */
getFragmentrec(Signal * signal,Uint32 fragId)27902 bool Dblqh::getFragmentrec(Signal* signal, Uint32 fragId)
27903 {
27904   for (Uint32 i = 0; i < NDB_ARRAY_SIZE(tabptr.p->fragid); i++) {
27905     jamDebug();
27906     if (tabptr.p->fragid[i] == fragId) {
27907       fragptr.i = tabptr.p->fragrec[i];
27908       c_fragment_pool.getPtr(fragptr);
27909       return true;
27910     }//if
27911   }//for
27912   D("getFragmentrec failed to find fragId: " << fragId <<
27913     " in table: " << tabptr.i);
27914   return false;
27915 }//Dblqh::getFragmentrec()
27916 
27917 /* ========================================================================= */
27918 /* ======                      INITIATE FRAGMENT RECORD              ======= */
27919 /*                                                                           */
27920 /* ========================================================================= */
initialiseAddfragrec(Signal * signal)27921 void Dblqh::initialiseAddfragrec(Signal* signal)
27922 {
27923   if (caddfragrecFileSize != 0) {
27924     for (addfragptr.i = 0; addfragptr.i < caddfragrecFileSize; addfragptr.i++) {
27925       ptrAss(addfragptr, addFragRecord);
27926       addfragptr.p->addfragStatus = AddFragRecord::FREE;
27927       addfragptr.p->nextAddfragrec = addfragptr.i + 1;
27928     }//for
27929     addfragptr.i = caddfragrecFileSize - 1;
27930     ptrAss(addfragptr, addFragRecord);
27931     addfragptr.p->nextAddfragrec = RNIL;
27932     cfirstfreeAddfragrec = 0;
27933   } else {
27934     jam();
27935     cfirstfreeAddfragrec = RNIL;
27936   }//if
27937 }//Dblqh::initialiseAddfragrec()
27938 
27939 /* ========================================================================= */
27940 /* ======                INITIATE FRAGMENT RECORD                    ======= */
27941 /*                                                                           */
27942 /* ========================================================================= */
initialiseFragrec(Signal * signal)27943 void Dblqh::initialiseFragrec(Signal* signal)
27944 {
27945 
27946   Fragrecord_list tmp(c_fragment_pool);
27947   while (tmp.seizeFirst(fragptr))
27948   {
27949     refresh_watch_dog();
27950     new (fragptr.p) Fragrecord();
27951     fragptr.p->fragStatus = Fragrecord::FREE;
27952     fragptr.p->execSrStatus = Fragrecord::IDLE;
27953     fragptr.p->srStatus = Fragrecord::SS_IDLE;
27954   }
27955   while (tmp.releaseFirst());
27956 }//Dblqh::initialiseFragrec()
27957 
27958 /* ========================================================================= */
27959 /* ======                INITIATE FRAGMENT RECORD                    ======= */
27960 /*                                                                           */
27961 /* ========================================================================= */
initialiseGcprec(Signal * signal)27962 void Dblqh::initialiseGcprec(Signal* signal)
27963 {
27964   UintR tigpIndex;
27965 
27966   if (cgcprecFileSize != 0) {
27967     for (gcpPtr.i = 0; gcpPtr.i < cgcprecFileSize; gcpPtr.i++) {
27968       ptrAss(gcpPtr, gcpRecord);
27969       for (tigpIndex = 0; tigpIndex < NDB_MAX_LOG_PARTS; tigpIndex++) {
27970         gcpPtr.p->gcpLogPartState[tigpIndex] = ZIDLE;
27971         gcpPtr.p->gcpSyncReady[tigpIndex] = ZFALSE;
27972       }//for
27973     }//for
27974   }//if
27975 }//Dblqh::initialiseGcprec()
27976 
27977 /* ========================================================================= */
27978 /* ======                INITIATE LCP RECORD                         ======= */
27979 /*                                                                           */
27980 /* ========================================================================= */
initialiseLcpRec(Signal * signal)27981 void Dblqh::initialiseLcpRec(Signal* signal)
27982 {
27983   if (clcpFileSize != 0) {
27984     for (lcpPtr.i = 0; lcpPtr.i < clcpFileSize; lcpPtr.i++) {
27985       ptrAss(lcpPtr, lcpRecord);
27986       lcpPtr.p->currentPrepareFragment.fragPtrI = RNIL;
27987       lcpPtr.p->currentPrepareFragment.lcpFragOrd.fragmentId = Uint32(~0);
27988       lcpPtr.p->currentPrepareFragment.lcpFragOrd.tableId = Uint32(~0);
27989       lcpPtr.p->currentRunFragment.fragPtrI = RNIL;
27990       lcpPtr.p->currentRunFragment.lcpFragOrd.fragmentId = Uint32(~0);
27991       lcpPtr.p->currentRunFragment.lcpFragOrd.tableId = Uint32(~0);
27992       lcpPtr.p->m_outstanding = 0;
27993       lcpPtr.p->m_no_of_records = 0;
27994       lcpPtr.p->m_no_of_bytes = 0;
27995       lcpPtr.p->lcpPrepareState = LcpRecord::LCP_IDLE;
27996       lcpPtr.p->lcpRunState = LcpRecord::LCP_IDLE;
27997       lcpPtr.p->firstFragmentFlag = false;
27998       lcpPtr.p->lastFragmentFlag = false;
27999       lcpPtr.p->m_early_lcps_need_synch = false;
28000       lcpPtr.p->m_wait_early_lcp_synch = false;
28001     }//for
28002   }//if
28003 }//Dblqh::initialiseLcpRec()
28004 
28005 /* ========================================================================= */
28006 /* ======         INITIATE LOG FILE OPERATION RECORD                 ======= */
28007 /*                                                                           */
28008 /* ========================================================================= */
initialiseLfo(Signal * signal)28009 void Dblqh::initialiseLfo(Signal* signal)
28010 {
28011   if (clfoFileSize != 0) {
28012     for (lfoPtr.i = 0; lfoPtr.i < clfoFileSize; lfoPtr.i++) {
28013       ptrAss(lfoPtr, logFileOperationRecord);
28014       lfoPtr.p->lfoState = LogFileOperationRecord::IDLE;
28015       lfoPtr.p->lfoTimer = 0;
28016       lfoPtr.p->nextLfo = lfoPtr.i + 1;
28017     }//for
28018     lfoPtr.i = clfoFileSize - 1;
28019     ptrAss(lfoPtr, logFileOperationRecord);
28020     lfoPtr.p->nextLfo = RNIL;
28021     cfirstfreeLfo = 0;
28022   } else {
28023     jam();
28024     cfirstfreeLfo = RNIL;
28025   }//if
28026 }//Dblqh::initialiseLfo()
28027 
28028 /* ========================================================================= */
28029 /* ======                 INITIATE LOG FILE RECORD                   ======= */
28030 /*                                                                           */
28031 /* ========================================================================= */
initialiseLogFile(Signal * signal)28032 void Dblqh::initialiseLogFile(Signal* signal)
28033 {
28034   if (clogFileFileSize != 0) {
28035     for (logFilePtr.i = 0; logFilePtr.i < clogFileFileSize; logFilePtr.i++) {
28036       ptrAss(logFilePtr, logFileRecord);
28037       logFilePtr.p->nextLogFile = logFilePtr.i + 1;
28038       logFilePtr.p->logFileStatus = LogFileRecord::LFS_IDLE;
28039 
28040       logFilePtr.p->logLastPrepRef = new Uint32[clogFileSize];
28041       logFilePtr.p->logMaxGciCompleted = new Uint32[clogFileSize];
28042       logFilePtr.p->logMaxGciStarted = new Uint32[clogFileSize];
28043 
28044       if (logFilePtr.p->logLastPrepRef == 0 ||
28045           logFilePtr.p->logMaxGciCompleted == 0 ||
28046           logFilePtr.p->logMaxGciStarted == 0)
28047       {
28048         char buf[256];
28049         BaseString::snprintf(buf, sizeof(buf),
28050                              "Failed to alloc mbyte(%u) arrays for logfile %u",
28051                              clogFileSize, logFilePtr.i);
28052         progError(__LINE__, NDBD_EXIT_MEMALLOC, buf);
28053       }
28054 
28055     }//for
28056     logFilePtr.i = clogFileFileSize - 1;
28057     ptrAss(logFilePtr, logFileRecord);
28058     logFilePtr.p->nextLogFile = RNIL;
28059     cfirstfreeLogFile = 0;
28060   } else {
28061     jam();
28062     cfirstfreeLogFile = RNIL;
28063   }//if
28064 }//Dblqh::initialiseLogFile()
28065 
28066 /* ========================================================================= */
28067 /* ======                  INITIATE LOG PAGES                        ======= */
28068 /*                                                                           */
28069 /* ========================================================================= */
initialiseLogPage(Signal * signal)28070 void Dblqh::initialiseLogPage(Signal* signal)
28071 {
28072   /**
28073    * Moved into initRecords()
28074    */
28075 }//Dblqh::initialiseLogPage()
28076 
28077 /* =========================================================================
28078  * ======                       INITIATE LOG PART RECORD             =======
28079  *
28080  * ========================================================================= */
initialiseLogPart(Signal * signal)28081 void Dblqh::initialiseLogPart(Signal* signal)
28082 {
28083   for (logPartPtr.i = 0; logPartPtr.i < clogPartFileSize; logPartPtr.i++) {
28084     ptrAss(logPartPtr, logPartRecord);
28085     logPartPtr.p->waitWriteGciLog = LogPartRecord::WWGL_FALSE;
28086     logPartPtr.p->LogLqhKeyReqSent = ZFALSE;
28087     logPartPtr.p->logPartNewestCompletedGCI = (UintR)-1;
28088     logPartPtr.p->logTcConrec = RNIL;
28089   }//for
28090 }//Dblqh::initialiseLogPart()
28091 
initialisePageRef(Signal * signal)28092 void Dblqh::initialisePageRef(Signal* signal)
28093 {
28094   if (cpageRefFileSize != 0) {
28095     for (pageRefPtr.i = 0;
28096 	 pageRefPtr.i < cpageRefFileSize;
28097 	 pageRefPtr.i++) {
28098       ptrAss(pageRefPtr, pageRefRecord);
28099       pageRefPtr.p->prNext = pageRefPtr.i + 1;
28100     }//for
28101     pageRefPtr.i = cpageRefFileSize - 1;
28102     ptrAss(pageRefPtr, pageRefRecord);
28103     pageRefPtr.p->prNext = RNIL;
28104     cfirstfreePageRef = 0;
28105   } else {
28106     jam();
28107     cfirstfreePageRef = RNIL;
28108   }//if
28109 }//Dblqh::initialisePageRef()
28110 
28111 /* ==========================================================================
28112  * =======                        INITIATE RECORDS                    =======
28113  *
28114  *       TAKES CARE OF INITIATION OF ALL RECORDS IN THIS BLOCK.
28115  * ========================================================================= */
initialiseRecordsLab(Signal * signal,Uint32 data,Uint32 retRef,Uint32 retData)28116 void Dblqh::initialiseRecordsLab(Signal* signal, Uint32 data,
28117 				 Uint32 retRef, Uint32 retData)
28118 {
28119   Uint32 i;
28120   switch (data) {
28121   case 0:
28122     jam();
28123     m_sr_nodes.clear();
28124     m_sr_exec_sr_req.clear();
28125     m_sr_exec_sr_conf.clear();
28126     for (i = 0; i < 4; i++) {
28127       cactiveCopy[i] = RNIL;
28128     }//for
28129     cnoActiveCopy = 0;
28130     ccurrentGcprec = RNIL;
28131     caddNodeState = ZFALSE;
28132     cstartRecReq = SRR_INITIAL; // Initial
28133     cnewestGci = 0;
28134     cnewestCompletedGci = 0;
28135     crestartOldestGci = 0;
28136     crestartNewestGci = 0;
28137     csrPhaseStarted = ZSR_NO_PHASE_STARTED;
28138     csrPhasesCompleted = 0;
28139     cmasterDihBlockref = 0;
28140     cnoFragmentsExecSr = 0;
28141     cnoOutstandingExecFragReq = 0;
28142     clcpCompletedState = LCP_IDLE;
28143     csrExecUndoLogState = EULS_IDLE;
28144     c_lcpId = (Uint32)~0; // Initialise to LCP id never used
28145     c_restart_lcpId = 0;
28146     c_restart_localLcpId = 0;
28147     c_restart_maxLcpId = 0;
28148     c_restart_maxLocalLcpId = 0;
28149     cnoOfFragsCheckpointed = 0;
28150     break;
28151   case 1:
28152     jam();
28153     initialiseAddfragrec(signal);
28154     break;
28155   case 2:
28156     jam();
28157     /* Unused */
28158     break;
28159   case 3:
28160     jam();
28161     /* Unused */
28162     break;
28163   case 4:
28164     jam();
28165     initialiseFragrec(signal);
28166     break;
28167   case 5:
28168     jam();
28169     initialiseGcprec(signal);
28170     initialiseLcpRec(signal);
28171     break;
28172   case 6:
28173     jam();
28174     initialiseLogPage(signal);
28175     break;
28176   case 7:
28177     jam();
28178     initialiseLfo(signal);
28179     break;
28180   case 8:
28181     jam();
28182     initialiseLogFile(signal);
28183     initialiseLogPart(signal);
28184     break;
28185   case 9:
28186     jam();
28187     initialisePageRef(signal);
28188     break;
28189   case 10:
28190     jam();
28191     initialiseScanrec(signal);
28192     break;
28193   case 11:
28194     jam();
28195     initialiseTabrec(signal);
28196     break;
28197   case 12:
28198     jam();
28199     initialiseTcNodeFailRec(signal);
28200     {
28201       ReadConfigConf * conf = (ReadConfigConf*)signal->getDataPtrSend();
28202       conf->senderRef = reference();
28203       conf->senderData = retData;
28204       sendSignal(retRef, GSN_READ_CONFIG_CONF, signal,
28205 		 ReadConfigConf::SignalLength, JBB);
28206     }
28207     return;
28208     break;
28209   default:
28210     ndbabort();
28211   }//switch
28212 
28213   signal->theData[0] = ZINITIALISE_RECORDS;
28214   signal->theData[1] = data + 1;
28215   signal->theData[2] = 0;
28216   signal->theData[3] = retRef;
28217   signal->theData[4] = retData;
28218   sendSignal(reference(), GSN_CONTINUEB, signal, 5, JBB);
28219 
28220   return;
28221 }//Dblqh::initialiseRecordsLab()
28222 
28223 /* ==========================================================================
28224  * =======                      INITIATE TC CONNECTION RECORD         =======
28225  *
28226  * ========================================================================= */
initialiseScanrec(Signal * signal)28227 void Dblqh::initialiseScanrec(Signal* signal)
28228 {
28229   jamDebug();
28230 
28231   /**
28232    * just seize records from pool and put into
28233    *   dedicated list
28234    *
28235    * We need to allocate an ACC pointer list that fits
28236    * all reserved since we can use the LCP record for NR or Backup and
28237    * vice versa for NR scans and Backup scans.
28238    * We mark as reserved afterwards as there should be no further seizing
28239    * of segments for acc_ptrs, and this is checked.
28240    */
28241   ndbrequire(c_scanRecordPool.seize(scanptr));
28242   m_reserved_scans.addFirst(scanptr); //LCP
28243   ndbrequire(scanptr.i == 0);
28244   ndbrequire(seize_acc_ptr_list(scanptr.p, 0, ZRESERVED_SCAN_BATCH_SIZE));
28245   scanptr.p->m_reserved = 1;
28246 
28247   ndbrequire(c_scanRecordPool.seize(scanptr));
28248   m_reserved_scans.addFirst(scanptr); //NR
28249   ndbrequire(scanptr.i == 1);
28250   ndbrequire(seize_acc_ptr_list(scanptr.p, 0, ZRESERVED_SCAN_BATCH_SIZE));
28251   scanptr.p->m_reserved = 1;
28252 
28253   ndbrequire(c_scanRecordPool.seize(scanptr));
28254   m_reserved_scans.addFirst(scanptr); //Backup
28255   ndbrequire(scanptr.i == 2);
28256   ndbrequire(seize_acc_ptr_list(scanptr.p, 0, ZRESERVED_SCAN_BATCH_SIZE));
28257   scanptr.p->m_reserved = 1;
28258 }//Dblqh::initialiseScanrec()
28259 
28260 /* ==========================================================================
28261  * =======                      INITIATE TABLE RECORD                 =======
28262  *
28263  * ========================================================================= */
initialiseTabrec(Signal * signal)28264 void Dblqh::initialiseTabrec(Signal* signal)
28265 {
28266   if (ctabrecFileSize != 0) {
28267     for (tabptr.i = 0; tabptr.i < ctabrecFileSize; tabptr.i++) {
28268       refresh_watch_dog();
28269       ptrAss(tabptr, tablerec);
28270       tabptr.p->tableStatus = Tablerec::NOT_DEFINED;
28271       tabptr.p->usageCountR = 0;
28272       tabptr.p->usageCountW = 0;
28273       tabptr.p->m_addfragptr_i = RNIL;
28274       for (Uint32 i = 0; i < NDB_ARRAY_SIZE(tabptr.p->fragid); i++) {
28275         tabptr.p->fragid[i] = ZNIL;
28276         tabptr.p->fragrec[i] = RNIL;
28277       }//for
28278     }//for
28279   }//if
28280 }//Dblqh::initialiseTabrec()
28281 
28282 /* ==========================================================================
28283  * =======                      INITIATE TC CONNECTION RECORD         =======
28284  *
28285  * ========================================================================= */
initialiseTcNodeFailRec(Signal * signal)28286 void Dblqh::initialiseTcNodeFailRec(Signal* signal)
28287 {
28288   TcNodeFailRecordPtr tcNodeFailPtr;
28289   if (ctcNodeFailrecFileSize != 0) {
28290     for (tcNodeFailPtr.i = 0;
28291 	 tcNodeFailPtr.i < ctcNodeFailrecFileSize;
28292 	 tcNodeFailPtr.i++) {
28293       ptrAss(tcNodeFailPtr, tcNodeFailRecord);
28294       tcNodeFailPtr.p->tcFailStatus = TcNodeFailRecord::TC_STATE_FALSE;
28295     }//for
28296   }//if
28297 }//Dblqh::initialiseTcNodeFailRec()
28298 
28299 /* ==========================================================================
28300  * =======              INITIATE FRAGMENT RECORD                      =======
28301  *
28302  *       SUBROUTINE SHORT NAME = IF
28303  * ========================================================================= */
initFragrec(Signal * signal,Uint32 tableId,Uint32 fragId,Uint32 copyType)28304 void Dblqh::initFragrec(Signal* signal,
28305                         Uint32 tableId,
28306                         Uint32 fragId,
28307                         Uint32 copyType)
28308 {
28309   new (fragptr.p) Fragrecord();
28310   fragptr.p->m_scanNumberMask.set(); // All is free
28311   fragptr.p->accBlockref = caccBlockref;
28312   fragptr.p->fragStatus = Fragrecord::DEFINED;
28313   fragptr.p->fragCopy = copyType;
28314   fragptr.p->tupBlockref = ctupBlockref;
28315   fragptr.p->tuxBlockref = ctuxBlockref;
28316   fragptr.p->logFlag = Fragrecord::STATE_TRUE;
28317   fragptr.p->lcpFlag = Fragrecord::LCP_STATE_TRUE;
28318   for (Uint32 i = 0; i < MAX_LCP_STORED; i++) {
28319     fragptr.p->lcpId[i] = 0;
28320   }//for
28321   fragptr.p->maxGciCompletedInLcp = 0;
28322   fragptr.p->accFragptr = RNIL;
28323   fragptr.p->maxGciInLcp = 0;
28324   fragptr.p->newestGci = cnewestGci;
28325   fragptr.p->tabRef = tableId;
28326   fragptr.p->fragId = fragId;
28327   fragptr.p->srStatus = Fragrecord::SS_IDLE;
28328   fragptr.p->execSrStatus = Fragrecord::IDLE;
28329   fragptr.p->execSrNoReplicas = 0;
28330   fragptr.p->fragDistributionKey = 0;
28331   fragptr.p->activeTcCounter = 0;
28332   fragptr.p->tableFragptr = RNIL;
28333   fragptr.p->m_copy_started_state = Fragrecord::AC_NORMAL;
28334   fragptr.p->lcp_frag_ord_state = Fragrecord::LCP_EXECUTED;
28335   fragptr.p->m_create_table_flag_lcp_frag_ord = false;
28336   fragptr.p->m_create_table_insert_lcp = false;
28337   fragptr.p->lcp_frag_ord_lcp_no = 0;
28338   fragptr.p->lcp_frag_ord_lcp_id = 0;
28339   fragptr.p->m_completed_gci = 0;
28340   fragptr.p->m_copy_complete_flag = 0;
28341   fragptr.p->m_local_lcp_instance_started = 0;
28342 }//Dblqh::initFragrec()
28343 
28344 /* ==========================================================================
28345  * =======       INITIATE FRAGMENT RECORD FOR SYSTEM RESTART          =======
28346  *
28347  *       SUBROUTINE SHORT NAME = IFS
28348  * ========================================================================= */
28349 
28350 /* ==========================================================================
28351  * =======       INITIATE INFORMATION ABOUT GLOBAL CHECKPOINTS        =======
28352  *               IN LOG FILE RECORDS
28353  *
28354  *       INPUT:     LOG_FILE_PTR            CURRENT LOG FILE
28355  *                  TNO_FD_DESCRIPTORS      THE NUMBER OF FILE DESCRIPTORS
28356  *                                          TO READ FROM THE LOG PAGE
28357  *                  LOG_PAGE_PTR            PAGE ZERO IN LOG FILE
28358  *       SUBROUTINE SHORT NAME = IGL
28359  * ========================================================================= */
initGciInLogFileRec(Signal * signal,Uint32 noFdDescriptors)28360 void Dblqh::initGciInLogFileRec(Signal* signal, Uint32 noFdDescriptors)
28361 {
28362   /* We are reading the per file:mb metadata from page zero in this file
28363    * We cannot use the data for this file (fd 0), but the data for
28364    * previous files is valid.
28365    * So we start reading at fd 1.
28366    * The metadata for this file (fd 0) is set either reading the next file,
28367    * or by probing the last megabytes.
28368    */
28369   LogFileRecordPtr filePtr = logFilePtr;
28370   Uint32 pos = ZPAGE_HEADER_SIZE + ZFD_HEADER_SIZE;
28371   ndbrequire(noFdDescriptors <= cmaxValidLogFilesInPageZero);
28372 
28373   /* We start by initialising the previous file's metadata,
28374    * so lets move there now...
28375    */
28376   filePtr.i = filePtr.p->prevLogFile;
28377   ptrCheckGuard(filePtr, clogFileFileSize, logFileRecord);
28378 
28379 
28380   for (Uint32 fd = 1; fd <= noFdDescriptors; fd++)
28381   {
28382     jam();
28383     for (Uint32 mb = 0; mb < clogFileSize; mb++)
28384     {
28385       jam();
28386       Uint32 pos0 = pos + fd * (ZFD_MBYTE_SIZE * clogFileSize) + mb;
28387       Uint32 pos1 = pos0 + clogFileSize;
28388       Uint32 pos2 = pos1 + clogFileSize;
28389       arrGuard(pos0, ZPAGE_SIZE);
28390       arrGuard(pos1, ZPAGE_SIZE);
28391       arrGuard(pos2, ZPAGE_SIZE);
28392       filePtr.p->logMaxGciCompleted[mb] = logPagePtr.p->logPageWord[pos0];
28393       filePtr.p->logMaxGciStarted[mb] = logPagePtr.p->logPageWord[pos1];
28394       filePtr.p->logLastPrepRef[mb] = logPagePtr.p->logPageWord[pos2];
28395     }
28396     if (fd + 1 <= noFdDescriptors)
28397     {
28398       jam();
28399       filePtr.i = filePtr.p->prevLogFile;
28400       ptrCheckGuard(filePtr, clogFileFileSize, logFileRecord);
28401     }
28402   }
28403 }//Dblqh::initGciInLogFileRec()
28404 
28405 /* ==========================================================================
28406  * =======              INITIATE LOG PART                             =======
28407  *
28408  * ========================================================================= */
initLogpart(Signal * signal)28409 void Dblqh::initLogpart(Signal* signal)
28410 {
28411   logPartPtr.p->m_total_written_words = Uint64(0);
28412   logPartPtr.p->m_last_total_written_words = Uint64(0);
28413   logPartPtr.p->execSrLogPage = RNIL;
28414   logPartPtr.p->execSrLogPageIndex = ZNIL;
28415   logPartPtr.p->execSrExecuteIndex = 0;
28416   logPartPtr.p->noLogFiles = cnoLogFiles;
28417   logPartPtr.p->logLap = 0;
28418   logPartPtr.p->logTailFileNo = 0;
28419   logPartPtr.p->logTailMbyte = 0;
28420   logPartPtr.p->lastMbyte = ZNIL;
28421   logPartPtr.p->logPartState = LogPartRecord::SR_FIRST_PHASE;
28422   logPartPtr.p->logExecState = LogPartRecord::LES_IDLE;
28423   logPartPtr.p->firstLogTcrec = RNIL;
28424   logPartPtr.p->lastLogTcrec = RNIL;
28425   logPartPtr.p->gcprec = RNIL;
28426   logPartPtr.p->firstPageRef = RNIL;
28427   logPartPtr.p->lastPageRef = RNIL;
28428   logPartPtr.p->headFileNo = ZNIL;
28429   logPartPtr.p->headPageNo = ZNIL;
28430   logPartPtr.p->headPageIndex = ZNIL;
28431   logPartPtr.p->firstInvalidatePageNo = ZNIL;
28432   logPartPtr.p->firstInvalidateFileNo = ZNIL;
28433   logPartPtr.p->endInvalidMByteSearch = ZNIL;
28434   logPartPtr.p->firstInvalidatePageFound = false;
28435   logPartPtr.p->m_log_problems = 0;
28436   NdbLogPartInfo lpinfo(instance());
28437   ndbrequire(lpinfo.partCount == clogPartFileSize);
28438   logPartPtr.p->logPartNo = lpinfo.partNo[logPartPtr.i];
28439   logPartPtr.p->m_io_tracker.init(logPartPtr.p->logPartNo);
28440   logPartPtr.p->m_log_prepare_queue.init();
28441   logPartPtr.p->m_log_complete_queue.init();
28442   logPartPtr.p->m_committed_words = 0;
28443 }//Dblqh::initLogpart()
28444 
28445 /* ==========================================================================
28446  * =======              INITIATE LOG POINTERS                         =======
28447  *
28448  * ========================================================================= */
initLogPointers(Signal * signal,const TcConnectionrecPtr tcConnectptr)28449 void Dblqh::initLogPointers(Signal* signal,
28450                             const TcConnectionrecPtr tcConnectptr)
28451 {
28452   logPartPtr.i = tcConnectptr.p->m_log_part_ptr_i;
28453   ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
28454   logFilePtr.i = logPartPtr.p->currentLogfile;
28455   ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
28456   logPagePtr.i = logFilePtr.p->currentLogpage;
28457   ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
28458 }//Dblqh::initLogPointers()
28459 
28460 /* ------------------------------------------------------------------------- */
28461 /* -------    INIT REQUEST INFO BEFORE EXECUTING A LOG RECORD        ------- */
28462 /*                                                                           */
28463 /* ------------------------------------------------------------------------- */
initReqinfoExecSr(Signal * signal,const TcConnectionrecPtr tcConnectptr)28464 void Dblqh::initReqinfoExecSr(Signal* signal,
28465                               const TcConnectionrecPtr tcConnectptr)
28466 {
28467   UintR Treqinfo = 0;
28468   TcConnectionrec * const regTcPtr = tcConnectptr.p;
28469 /* ------------------------------------------------------------------------- */
28470 /* NUMBER OF BACKUPS AND STANDBYS ARE ZERO AND NEED NOT BE SET.              */
28471 /* REPLICA TYPE IS CLEARED BY SEND_LQHKEYREQ.                                */
28472 /* ------------------------------------------------------------------------- */
28473 /* ------------------------------------------------------------------------- */
28474 /*       SET LAST REPLICA NUMBER TO ZERO (BIT 10-11)                         */
28475 /* ------------------------------------------------------------------------- */
28476 /* ------------------------------------------------------------------------- */
28477 /*       SET DIRTY FLAG                                                      */
28478 /* ------------------------------------------------------------------------- */
28479   LqhKeyReq::setDirtyFlag(Treqinfo, 1);
28480 /* ------------------------------------------------------------------------- */
28481 /*       SET SIMPLE TRANSACTION                                              */
28482 /* ------------------------------------------------------------------------- */
28483   LqhKeyReq::setSimpleFlag(Treqinfo, 1);
28484   LqhKeyReq::setGCIFlag(Treqinfo, 1);
28485 /* ------------------------------------------------------------------------- */
28486 /* SET OPERATION TYPE AND LOCK MODE (NEVER READ OPERATION OR SCAN IN LOG)    */
28487 /* ------------------------------------------------------------------------- */
28488   LqhKeyReq::setOperation(Treqinfo, regTcPtr->operation);
28489   regTcPtr->reqinfo = Treqinfo;
28490 /* ------------------------------------------------------------------------ */
28491 /* NO OF BACKUP IS SET TO ONE AND NUMBER OF STANDBY NODES IS SET TO ZERO.   */
28492 /* THUS THE RECEIVING NODE WILL EXPECT THAT IT IS THE LAST NODE AND WILL    */
28493 /* SEND COMPLETED AS THE RESPONSE SIGNAL SINCE DIRTY_OP BIT IS SET.         */
28494 /* ------------------------------------------------------------------------ */
28495 /* ------------------------------------------------------------------------- */
28496 /*       SET REPLICA TYPE TO PRIMARY AND NUMBER OF REPLICA TO ONE            */
28497 /* ------------------------------------------------------------------------- */
28498   regTcPtr->lastReplicaNo = 0;
28499   regTcPtr->nextSeqNoReplica = 0;
28500   regTcPtr->opExec = 0;
28501   regTcPtr->readlenAi = 0;
28502   regTcPtr->nodeAfterNext[0] = ZNIL;
28503   regTcPtr->nodeAfterNext[1] = ZNIL;
28504   regTcPtr->dirtyOp = ZFALSE;
28505   regTcPtr->tcBlockref = cownref;
28506   regTcPtr->m_reorg = ScanFragReq::REORG_ALL;
28507   regTcPtr->m_dealloc_state = TcConnectionrec::DA_IDLE;
28508   regTcPtr->m_dealloc_data.m_unused = RNIL;
28509   regTcPtr->indTakeOver = ZFALSE;
28510   regTcPtr->m_flags = 0;
28511 }//Dblqh::initReqinfoExecSr()
28512 
28513 /* --------------------------------------------------------------------------
28514  * -------               INSERT FRAGMENT                              -------
28515  *
28516  * ------------------------------------------------------------------------- */
insertFragrec(Signal * signal,Uint32 fragId)28517 bool Dblqh::insertFragrec(Signal* signal, Uint32 fragId)
28518 {
28519   terrorCode = ZOK;
28520   if(c_fragment_pool.seize(fragptr) == false)
28521   {
28522     terrorCode = ZNO_FREE_FRAGMENTREC;
28523     return false;
28524   }
28525   ndbrequire(fragptr.p->fragStatus == Fragrecord::FREE);
28526   for (Uint32 i = 0; i < NDB_ARRAY_SIZE(tabptr.p->fragid); i++) {
28527     jam();
28528     if (tabptr.p->fragid[i] == ZNIL) {
28529       jam();
28530       tabptr.p->fragid[i] = fragId;
28531       tabptr.p->fragrec[i] = fragptr.i;
28532       return true;
28533     }//if
28534   }//for
28535   c_fragment_pool.release(fragptr);
28536   terrorCode = ZTOO_MANY_FRAGMENTS;
28537   return false;
28538 }//Dblqh::insertFragrec()
28539 
28540 /* -------------------------------------------------------------------------
28541  * -------               LINK OPERATION INTO WAITING FOR LOGGING     -------
28542  *
28543  *       SUBROUTINE SHORT NAME = LWL
28544 // Input Pointers:
28545 // tcConnectptr
28546 // logPartPtr
28547  * ------------------------------------------------------------------------- */
28548 void
linkWaitLog(Signal * signal,LogPartRecordPtr regLogPartPtr,LogPartRecord::OperationQueue & queue,const TcConnectionrecPtr tcConnectptr)28549 Dblqh::linkWaitLog(Signal* signal,
28550                    LogPartRecordPtr regLogPartPtr,
28551                    LogPartRecord::OperationQueue & queue,
28552                    const TcConnectionrecPtr tcConnectptr)
28553 {
28554   TcConnectionrecPtr lwlTcConnectptr;
28555 /* -------------------------------------------------- */
28556 /*       LINK ACTIVE OPERATION INTO QUEUE WAITING FOR */
28557 /*       ACCESS TO THE LOG PART.                      */
28558 /* -------------------------------------------------- */
28559   lwlTcConnectptr.i = queue.lastElement;
28560   if (lwlTcConnectptr.i == RNIL) {
28561     jam();
28562     queue.firstElement = tcConnectptr.i;
28563     tcConnectptr.p->prevTcLogQueue = RNIL;
28564   } else {
28565     jam();
28566     ndbrequire(tcConnect_pool.getValidPtr(lwlTcConnectptr));
28567     lwlTcConnectptr.p->nextTcLogQueue = tcConnectptr.i;
28568     tcConnectptr.p->prevTcLogQueue = lwlTcConnectptr.i;
28569   }//if
28570   queue.lastElement = tcConnectptr.i;
28571   tcConnectptr.p->nextTcLogQueue = RNIL;
28572   regLogPartPtr.p->logPartState = LogPartRecord::ACTIVE;
28573   if (regLogPartPtr.p->LogLqhKeyReqSent == ZFALSE)
28574   {
28575     jam();
28576     regLogPartPtr.p->LogLqhKeyReqSent = ZTRUE;
28577     signal->theData[0] = ZLOG_LQHKEYREQ;
28578     signal->theData[1] = regLogPartPtr.i;
28579     sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
28580   }//if
28581 }//Dblqh::linkWaitLog()
28582 
28583 /* --------------------------------------------------------------------------
28584  * -------          START THE NEXT OPERATION ON THIS LOG PART IF ANY  -------
28585  * -------               OPERATIONS ARE QUEUED.                       -------
28586  *
28587  *       SUBROUTINE SHORT NAME = LNS
28588 // Input Pointers:
28589 // tcConnectptr
28590 // logPartPtr
28591  * ------------------------------------------------------------------------- */
logNextStart(Signal * signal)28592 void Dblqh::logNextStart(Signal* signal)
28593 {
28594   LogPartRecordPtr lnsLogPartPtr;
28595   UintR tlnsStillWaiting;
28596   LogPartRecord * const regLogPartPtr = logPartPtr.p;
28597 
28598   if (regLogPartPtr->m_log_prepare_queue.isEmpty() &&
28599       regLogPartPtr->m_log_complete_queue.isEmpty() &&
28600       (regLogPartPtr->waitWriteGciLog != LogPartRecord::WWGL_TRUE))
28601   {
28602 // --------------------------------------------------------------------------
28603 // Optimised route for the common case
28604 // --------------------------------------------------------------------------
28605     return;
28606   }//if
28607 
28608   if (!regLogPartPtr->m_log_prepare_queue.isEmpty() ||
28609       !regLogPartPtr->m_log_complete_queue.isEmpty())
28610   {
28611     jam();
28612     regLogPartPtr->logPartState = LogPartRecord::ACTIVE;
28613     if (regLogPartPtr->LogLqhKeyReqSent == ZFALSE)
28614     {
28615       jam();
28616       regLogPartPtr->LogLqhKeyReqSent = ZTRUE;
28617       signal->theData[0] = ZLOG_LQHKEYREQ;
28618       signal->theData[1] = logPartPtr.i;
28619       sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
28620     }//if
28621   }
28622 
28623   if (regLogPartPtr->waitWriteGciLog != LogPartRecord::WWGL_TRUE)
28624   {
28625     jam();
28626     return;
28627   }
28628   else
28629   {
28630     jam();
28631 /* --------------------------------------------------------------------------
28632  *   A COMPLETE GCI LOG RECORD IS WAITING TO BE WRITTEN. WE GIVE THIS HIGHEST
28633  *   PRIORITY AND WRITE IT IMMEDIATELY. AFTER WRITING IT WE CHECK IF ANY MORE
28634  *   LOG PARTS ARE WAITING. IF NOT WE SEND A SIGNAL THAT INITIALISES THE GCP
28635  *   RECORD TO WAIT UNTIL ALL COMPLETE GCI LOG RECORDS HAVE REACHED TO DISK.
28636  * -------------------------------------------------------------------------- */
28637     writeCompletedGciLog(signal);
28638     logPartPtr.p->waitWriteGciLog = LogPartRecord::WWGL_FALSE;
28639     tlnsStillWaiting = ZFALSE;
28640     for (lnsLogPartPtr.i = 0; lnsLogPartPtr.i < clogPartFileSize; lnsLogPartPtr.i++) {
28641       jam();
28642       ptrAss(lnsLogPartPtr, logPartRecord);
28643       if (lnsLogPartPtr.p->waitWriteGciLog == LogPartRecord::WWGL_TRUE) {
28644         jam();
28645         tlnsStillWaiting = ZTRUE;
28646       }//if
28647     }//for
28648     if (tlnsStillWaiting == ZFALSE) {
28649       jam();
28650       signal->theData[0] = ZINIT_GCP_REC;
28651       sendSignal(cownref, GSN_CONTINUEB, signal, 1, JBB);
28652     }//if
28653   }//if
28654 }//Dblqh::logNextStart()
28655 
28656 /* --------------------------------------------------------------------------
28657  * -------       MOVE PAGES FROM LFO RECORD TO PAGE REFERENCE RECORD  -------
28658  *               WILL ALWAYS MOVE 8 PAGES TO A PAGE REFERENCE RECORD.
28659  *
28660  *       SUBROUTINE SHORT NAME = MPR
28661  * ------------------------------------------------------------------------- */
moveToPageRef(Signal * signal)28662 void Dblqh::moveToPageRef(Signal* signal)
28663 {
28664   LogPageRecordPtr mprLogPagePtr;
28665   PageRefRecordPtr mprPageRefPtr;
28666   UintR tmprIndex;
28667 
28668 /* --------------------------------------------------------------------------
28669  * -------       INSERT PAGE REFERENCE RECORD                         -------
28670  *
28671  *       INPUT:  LFO_PTR         LOG FILE OPERATION RECORD
28672  *               LOG_PART_PTR    LOG PART RECORD
28673  *               PAGE_REF_PTR    THE PAGE REFERENCE RECORD TO BE INSERTED.
28674  * ------------------------------------------------------------------------- */
28675   PageRefRecordPtr iprPageRefPtr;
28676 
28677   if ((logPartPtr.p->mmBufferSize + 8) >= ZMAX_MM_BUFFER_SIZE) {
28678     jam();
28679     pageRefPtr.i = logPartPtr.p->firstPageRef;
28680     ptrCheckGuard(pageRefPtr, cpageRefFileSize, pageRefRecord);
28681     releasePrPages(signal);
28682     removePageRef(signal);
28683   } else {
28684     jam();
28685     logPartPtr.p->mmBufferSize = logPartPtr.p->mmBufferSize + 8;
28686   }//if
28687   seizePageRef(signal);
28688   if (logPartPtr.p->firstPageRef == RNIL) {
28689     jam();
28690     logPartPtr.p->firstPageRef = pageRefPtr.i;
28691   } else {
28692     jam();
28693     iprPageRefPtr.i = logPartPtr.p->lastPageRef;
28694     ptrCheckGuard(iprPageRefPtr, cpageRefFileSize, pageRefRecord);
28695     iprPageRefPtr.p->prNext = pageRefPtr.i;
28696   }//if
28697   pageRefPtr.p->prPrev = logPartPtr.p->lastPageRef;
28698   logPartPtr.p->lastPageRef = pageRefPtr.i;
28699 
28700   pageRefPtr.p->prFileNo = logFilePtr.p->fileNo;
28701   pageRefPtr.p->prPageNo = lfoPtr.p->lfoPageNo;
28702   tmprIndex = 0;
28703   mprLogPagePtr.i = lfoPtr.p->firstLfoPage;
28704 MPR_LOOP:
28705   arrGuard(tmprIndex, 8);
28706   pageRefPtr.p->pageRef[tmprIndex] = mprLogPagePtr.i;
28707   tmprIndex = tmprIndex + 1;
28708   ptrCheckGuard(mprLogPagePtr, clogPageFileSize, logPageRecord);
28709   mprLogPagePtr.i = mprLogPagePtr.p->logPageWord[ZNEXT_PAGE];
28710   if (mprLogPagePtr.i != RNIL) {
28711     jam();
28712     goto MPR_LOOP;
28713   }//if
28714   mprPageRefPtr.i = pageRefPtr.p->prPrev;
28715   if (mprPageRefPtr.i != RNIL) {
28716     jam();
28717     ptrCheckGuard(mprPageRefPtr, cpageRefFileSize, pageRefRecord);
28718     mprLogPagePtr.i = mprPageRefPtr.p->pageRef[7];
28719     ptrCheckGuard(mprLogPagePtr, clogPageFileSize, logPageRecord);
28720     mprLogPagePtr.p->logPageWord[ZNEXT_PAGE] = pageRefPtr.p->pageRef[0];
28721   }//if
28722 }//Dblqh::moveToPageRef()
28723 
28724 /* ------------------------------------------------------------------------- */
28725 /* -------               READ THE ATTRINFO FROM THE LOG              ------- */
28726 /*                                                                           */
28727 /*       SUBROUTINE SHORT NAME = RA                                          */
28728 /* ------------------------------------------------------------------------- */
readAttrinfo(Signal * signal,const TcConnectionrecPtr tcConnectptr)28729 void Dblqh::readAttrinfo(Signal* signal, const TcConnectionrecPtr tcConnectptr)
28730 {
28731   Uint32 remainingLen = tcConnectptr.p->totSendlenAi;
28732   tcConnectptr.p->reclenAiLqhkey = 0;
28733   if (remainingLen == 0) {
28734     jam();
28735     return;
28736   }//if
28737 
28738   readLogData(signal, remainingLen, tcConnectptr.p->attrInfoIVal);
28739 }//Dblqh::readAttrinfo()
28740 
28741 /* ------------------------------------------------------------------------- */
28742 /* -------               READ COMMIT LOG                             ------- */
28743 /*                                                                           */
28744 /*       SUBROUTINE SHORT NAME = RCL                                         */
28745 /* ------------------------------------------------------------------------- */
readCommitLog(Signal * signal,CommitLogRecord * commitLogRecord,const TcConnectionrecPtr tcConnectptr)28746 void Dblqh::readCommitLog(Signal* signal,
28747                           CommitLogRecord* commitLogRecord,
28748                           const TcConnectionrecPtr tcConnectptr)
28749 {
28750   Uint32 trclPageIndex = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX];
28751   if ((trclPageIndex + (ZCOMMIT_LOG_SIZE - 1)) < ZPAGE_SIZE) {
28752     jam();
28753     tcConnectptr.p->tableref = logPagePtr.p->logPageWord[trclPageIndex + 0];
28754     tcConnectptr.p->schemaVersion = logPagePtr.p->logPageWord[trclPageIndex + 1];
28755     tcConnectptr.p->fragmentid = logPagePtr.p->logPageWord[trclPageIndex + 2];
28756     commitLogRecord->fileNo = logPagePtr.p->logPageWord[trclPageIndex + 3];
28757     commitLogRecord->startPageNo = logPagePtr.p->logPageWord[trclPageIndex + 4];
28758     commitLogRecord->startPageIndex = logPagePtr.p->logPageWord[trclPageIndex + 5];
28759     commitLogRecord->stopPageNo = logPagePtr.p->logPageWord[trclPageIndex + 6];
28760     tcConnectptr.p->gci_hi = logPagePtr.p->logPageWord[trclPageIndex + 7];
28761     tcConnectptr.p->gci_lo = 0;
28762     logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] =
28763                             (trclPageIndex + ZCOMMIT_LOG_SIZE) - 1;
28764   } else {
28765     jam();
28766     tcConnectptr.p->tableref = readLogword(signal);
28767     tcConnectptr.p->schemaVersion = readLogword(signal);
28768     tcConnectptr.p->fragmentid = readLogword(signal);
28769     commitLogRecord->fileNo = readLogword(signal);
28770     commitLogRecord->startPageNo = readLogword(signal);
28771     commitLogRecord->startPageIndex = readLogword(signal);
28772     commitLogRecord->stopPageNo = readLogword(signal);
28773     tcConnectptr.p->gci_hi = readLogword(signal);
28774     tcConnectptr.p->gci_lo = 0;
28775   }//if
28776   tcConnectptr.p->transid[0] = logPartPtr.i + 65536;
28777   tcConnectptr.p->transid[1] = (DBLQH << 20) + (cownNodeid << 8);
28778 }//Dblqh::readCommitLog()
28779 
28780 /* ------------------------------------------------------------------------- */
28781 /* -------        READ LOG PAGES FROM DISK IN ORDER TO EXECUTE A LOG ------- */
28782 /*                RECORD WHICH WAS NOT FOUND IN MAIN MEMORY.                 */
28783 /*                                                                           */
28784 /*       SUBROUTINE SHORT NAME = REL                                         */
28785 /* ------------------------------------------------------------------------- */
readExecLog(Signal * signal)28786 void Dblqh::readExecLog(Signal* signal)
28787 {
28788   UintR trelIndex;
28789   UintR trelI;
28790 
28791   seizeLfo(signal);
28792   initLfo(signal);
28793   trelI = logPartPtr.p->execSrStopPageNo - logPartPtr.p->execSrStartPageNo;
28794   arrGuard(trelI + 1, 16);
28795   lfoPtr.p->logPageArray[trelI + 1] = logPartPtr.p->execSrStartPageNo;
28796   for (trelIndex = logPartPtr.p->execSrStopPageNo; (trelIndex >= logPartPtr.p->execSrStartPageNo) &&
28797        (UintR)~trelIndex; trelIndex--) {
28798     jam();
28799     seizeLogpage(signal);
28800     arrGuard(trelI, 16);
28801     lfoPtr.p->logPageArray[trelI] = logPagePtr.i;
28802     trelI--;
28803   }//for
28804   lfoPtr.p->lfoPageNo = logPartPtr.p->execSrStartPageNo;
28805   lfoPtr.p->noPagesRw = (logPartPtr.p->execSrStopPageNo -
28806 			 logPartPtr.p->execSrStartPageNo) + 1;
28807   lfoPtr.p->firstLfoPage = lfoPtr.p->logPageArray[0];
28808   signal->theData[0] = logFilePtr.p->fileRef;
28809   signal->theData[1] = cownref;
28810   signal->theData[2] = lfoPtr.i;
28811   signal->theData[3] = ZLIST_OF_MEM_PAGES; // edtjamo TR509 //ZLIST_OF_PAIRS;
28812   signal->theData[4] = ZVAR_NO_LOG_PAGE_WORD;
28813   signal->theData[5] = lfoPtr.p->noPagesRw;
28814   signal->theData[6] = lfoPtr.p->logPageArray[0];
28815   signal->theData[7] = lfoPtr.p->logPageArray[1];
28816   signal->theData[8] = lfoPtr.p->logPageArray[2];
28817   signal->theData[9] = lfoPtr.p->logPageArray[3];
28818   signal->theData[10] = lfoPtr.p->logPageArray[4];
28819   signal->theData[11] = lfoPtr.p->logPageArray[5];
28820   signal->theData[12] = lfoPtr.p->logPageArray[6];
28821   signal->theData[13] = lfoPtr.p->logPageArray[7];
28822   signal->theData[14] = lfoPtr.p->logPageArray[8];
28823   signal->theData[15] = lfoPtr.p->logPageArray[9];
28824   sendSignal(NDBFS_REF, GSN_FSREADREQ, signal, 16, JBA);
28825 
28826   logPartPtr.p->m_redoWorkStats.m_pagesRead+= lfoPtr.p->noPagesRw;
28827 
28828 
28829   if (DEBUG_REDO)
28830   {
28831     ndbout_c("readExecLog %u page at part: %u file: %u page: %u (mb: %u)",
28832              lfoPtr.p->noPagesRw,
28833              logPartPtr.p->logPartNo,
28834              logFilePtr.p->fileNo,
28835              logPartPtr.p->execSrStartPageNo,
28836              logPartPtr.p->execSrStartPageNo >> ZTWOLOG_NO_PAGES_IN_MBYTE);
28837   }
28838 }//Dblqh::readExecLog()
28839 
28840 /* ------------------------------------------------------------------------- */
28841 /* -------        READ 64 KBYTES WHEN EXECUTING THE FRAGMENT LOG     ------- */
28842 /*                                                                           */
28843 /*       SUBROUTINE SHORT NAME = RES                                         */
28844 /* ------------------------------------------------------------------------- */
readExecSrNewMbyte(Signal * signal)28845 void Dblqh::readExecSrNewMbyte(Signal* signal)
28846 {
28847   logFilePtr.p->currentFilepage = logFilePtr.p->currentMbyte * ZPAGES_IN_MBYTE;
28848   logFilePtr.p->filePosition = logFilePtr.p->currentMbyte * ZPAGES_IN_MBYTE;
28849   logPartPtr.p->execSrPagesRead = 0;
28850   logPartPtr.p->execSrPagesReading = 0;
28851   logPartPtr.p->execSrPagesExecuted = 0;
28852   readExecSr(signal);
28853   logPartPtr.p->logExecState = LogPartRecord::LES_WAIT_READ_EXEC_SR_NEW_MBYTE;
28854 }//Dblqh::readExecSrNewMbyte()
28855 
28856 /* ------------------------------------------------------------------------- */
28857 /* -------        READ 64 KBYTES WHEN EXECUTING THE FRAGMENT LOG     ------- */
28858 /*                                                                           */
28859 /*       SUBROUTINE SHORT NAME = RES                                         */
28860 /* ------------------------------------------------------------------------- */
readExecSr(Signal * signal)28861 void Dblqh::readExecSr(Signal* signal)
28862 {
28863   UintR tresPageid;
28864   UintR tresIndex;
28865 
28866   tresPageid = logFilePtr.p->filePosition;
28867   seizeLfo(signal);
28868   initLfo(signal);
28869   for (tresIndex = 7; (UintR)~tresIndex; tresIndex--) {
28870     jam();
28871 /* ------------------------------------------------------------------------- */
28872 /* GO BACKWARDS SINCE WE INSERT AT THE BEGINNING AND WE WANT THAT FIRST PAGE */
28873 /* SHALL BE FIRST AND LAST PAGE LAST.                                        */
28874 /* ------------------------------------------------------------------------- */
28875     seizeLogpage(signal);
28876     lfoPtr.p->logPageArray[tresIndex] = logPagePtr.i;
28877   }//for
28878   lfoPtr.p->lfoState = LogFileOperationRecord::READ_EXEC_SR;
28879   lfoPtr.p->lfoPageNo = tresPageid;
28880   logFilePtr.p->filePosition = logFilePtr.p->filePosition + 8;
28881   logPartPtr.p->execSrPagesReading = logPartPtr.p->execSrPagesReading + 8;
28882   lfoPtr.p->noPagesRw = 8;
28883   lfoPtr.p->firstLfoPage = lfoPtr.p->logPageArray[0];
28884   signal->theData[0] = logFilePtr.p->fileRef;
28885   signal->theData[1] = cownref;
28886   signal->theData[2] = lfoPtr.i;
28887   signal->theData[3] = ZLIST_OF_MEM_PAGES;
28888   signal->theData[4] = ZVAR_NO_LOG_PAGE_WORD;
28889   signal->theData[5] = 8;
28890   signal->theData[6] = lfoPtr.p->logPageArray[0];
28891   signal->theData[7] = lfoPtr.p->logPageArray[1];
28892   signal->theData[8] = lfoPtr.p->logPageArray[2];
28893   signal->theData[9] = lfoPtr.p->logPageArray[3];
28894   signal->theData[10] = lfoPtr.p->logPageArray[4];
28895   signal->theData[11] = lfoPtr.p->logPageArray[5];
28896   signal->theData[12] = lfoPtr.p->logPageArray[6];
28897   signal->theData[13] = lfoPtr.p->logPageArray[7];
28898   signal->theData[14] = tresPageid;
28899   sendSignal(NDBFS_REF, GSN_FSREADREQ, signal, 15, JBA);
28900 
28901   logPartPtr.p->m_redoWorkStats.m_pagesRead +=8;
28902 
28903   if (DEBUG_REDO)
28904   {
28905     ndbout_c("readExecSr %u page at part: %u file: %u page: %u (mb: %u)",
28906              8,
28907              logPartPtr.p->logPartNo,
28908              logFilePtr.p->fileNo,
28909              tresPageid,
28910              tresPageid >> ZTWOLOG_NO_PAGES_IN_MBYTE);
28911   }
28912 }//Dblqh::readExecSr()
28913 
28914 /* ------------------------------------------------------------------------- */
28915 /* ------------ READ THE PRIMARY KEY FROM THE LOG           ---------------- */
28916 /*                                                                           */
28917 /*       SUBROUTINE SHORT NAME = RK                                          */
28918 /* --------------------------------------------------------------------------*/
readKey(Signal * signal,const TcConnectionrecPtr tcConnectptr)28919 void Dblqh::readKey(Signal* signal, const TcConnectionrecPtr tcConnectptr)
28920 {
28921   Uint32 remainingLen = tcConnectptr.p->primKeyLen;
28922   ndbrequire(remainingLen != 0);
28923 
28924   readLogData(signal, remainingLen, tcConnectptr.p->keyInfoIVal);
28925 }//Dblqh::readKey()
28926 
28927 /* ------------------------------------------------------------------------- */
28928 /* ------------ READ A NUMBER OF WORDS FROM LOG INTO CDATA  ---------------- */
28929 /*                                                                           */
28930 /*       SUBROUTINE SHORT NAME = RLD                                         */
28931 /* --------------------------------------------------------------------------*/
readLogData(Signal * signal,Uint32 noOfWords,Uint32 & sectionIVal)28932 void Dblqh::readLogData(Signal* signal, Uint32 noOfWords, Uint32& sectionIVal)
28933 {
28934   Uint32 logPos = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX];
28935   if ((logPos + noOfWords) >= ZPAGE_SIZE) {
28936     for (Uint32 i = 0; i < noOfWords; i++)
28937     {
28938       /* Todo : Consider reading > 1 word at a time */
28939       Uint32 word= readLogwordExec(signal);
28940       bool ok= appendToSection(sectionIVal,
28941                                &word,
28942                                1);
28943       ndbrequire(ok);
28944     }
28945   } else {
28946     /* In one bite */
28947     bool ok= appendToSection(sectionIVal,
28948                              &logPagePtr.p->logPageWord[logPos],
28949                              noOfWords);
28950     ndbrequire(ok);
28951     logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = logPos + noOfWords;
28952   }//if
28953 }//Dblqh::readLogData()
28954 
28955 /* ------------------------------------------------------------------------- */
28956 /* ------------ READ THE LOG HEADER OF A PREPARE LOG HEADER ---------------- */
28957 /*                                                                           */
28958 /*       SUBROUTINE SHORT NAME = RLH                                         */
28959 /* --------------------------------------------------------------------------*/
readLogHeader(Signal * signal,const TcConnectionrecPtr tcConnectptr)28960 void Dblqh::readLogHeader(Signal* signal,
28961                           const TcConnectionrecPtr tcConnectptr)
28962 {
28963   Uint32 logPos = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX];
28964   if ((logPos + ZLOG_HEAD_SIZE) < ZPAGE_SIZE) {
28965     jam();
28966     tcConnectptr.p->hashValue = logPagePtr.p->logPageWord[logPos + 2];
28967     tcConnectptr.p->operation = logPagePtr.p->logPageWord[logPos + 3];
28968     tcConnectptr.p->totSendlenAi = logPagePtr.p->logPageWord[logPos + 4];
28969     tcConnectptr.p->primKeyLen = logPagePtr.p->logPageWord[logPos + 5];
28970     tcConnectptr.p->m_row_id.m_page_no = logPagePtr.p->logPageWord[logPos + 6];
28971     tcConnectptr.p->m_row_id.m_page_idx = logPagePtr.p->logPageWord[logPos+ 7];
28972     logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = logPos + ZLOG_HEAD_SIZE;
28973   } else {
28974     jam();
28975     readLogwordExec(signal);	/* IGNORE PREPARE LOG RECORD TYPE */
28976     readLogwordExec(signal);	/* IGNORE LOG RECORD SIZE         */
28977     tcConnectptr.p->hashValue = readLogwordExec(signal);
28978     tcConnectptr.p->operation = readLogwordExec(signal);
28979     tcConnectptr.p->totSendlenAi = readLogwordExec(signal);
28980     tcConnectptr.p->primKeyLen = readLogwordExec(signal);
28981     tcConnectptr.p->m_row_id.m_page_no = readLogwordExec(signal);
28982     tcConnectptr.p->m_row_id.m_page_idx = readLogwordExec(signal);
28983   }//if
28984 
28985   tcConnectptr.p->m_use_rowid = (tcConnectptr.p->operation == ZINSERT);
28986 }//Dblqh::readLogHeader()
28987 
28988 /* ------------------------------------------------------------------------- */
28989 /* -------               READ A WORD FROM THE LOG                    ------- */
28990 /*                                                                           */
28991 /*       OUTPUT:         TLOG_WORD                                           */
28992 /*       SUBROUTINE SHORT NAME = RLW                                         */
28993 /* ------------------------------------------------------------------------- */
readLogword(Signal * signal)28994 Uint32 Dblqh::readLogword(Signal* signal)
28995 {
28996   Uint32 logPos = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX];
28997   ndbrequire(logPos < ZPAGE_SIZE);
28998   Uint32 logWord = logPagePtr.p->logPageWord[logPos];
28999   logPos++;
29000   logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = logPos;
29001   if (logPos >= ZPAGE_SIZE) {
29002     jam();
29003     logPagePtr.i = logPagePtr.p->logPageWord[ZNEXT_PAGE];
29004     ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
29005     logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = ZPAGE_HEADER_SIZE;
29006     logFilePtr.p->currentLogpage = logPagePtr.i;
29007     logFilePtr.p->currentFilepage++;
29008     logPartPtr.p->execSrPagesRead--;
29009     logPartPtr.p->execSrPagesExecuted++;
29010   }//if
29011   return logWord;
29012 }//Dblqh::readLogword()
29013 
29014 /* ------------------------------------------------------------------------- */
29015 /* -------   READ A WORD FROM THE LOG WHEN EXECUTING A LOG RECORD    ------- */
29016 /*                                                                           */
29017 /*       OUTPUT:         TLOG_WORD                                           */
29018 /*       SUBROUTINE SHORT NAME = RWE                                         */
29019 /* ------------------------------------------------------------------------- */
readLogwordExec(Signal * signal)29020 Uint32 Dblqh::readLogwordExec(Signal* signal)
29021 {
29022   Uint32 logPos = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX];
29023   ndbrequire(logPos < ZPAGE_SIZE);
29024   Uint32 logWord = logPagePtr.p->logPageWord[logPos];
29025   logPos++;
29026   logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = logPos;
29027   if (logPos >= ZPAGE_SIZE) {
29028     jam();
29029     logPagePtr.i = logPagePtr.p->logPageWord[ZNEXT_PAGE];
29030     if (logPagePtr.i != RNIL){
29031       ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
29032       logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = ZPAGE_HEADER_SIZE;
29033     } else {
29034       // Reading word at the last pos in the last page
29035       // Don't step forward to next page!
29036       jam();
29037       logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX]++;
29038     }
29039   }//if
29040   return logWord;
29041 }//Dblqh::readLogwordExec()
29042 
29043 /* ------------------------------------------------------------------------- */
29044 /* -------               READ A SINGLE PAGE FROM THE LOG             ------- */
29045 /*                                                                           */
29046 /*       INPUT:          TRSP_PAGE_NO                                        */
29047 /*       SUBROUTINE SHORT NAME = RSP                                         */
29048 /* ------------------------------------------------------------------------- */
readSinglePage(Signal * signal,Uint32 pageNo)29049 void Dblqh::readSinglePage(Signal* signal, Uint32 pageNo)
29050 {
29051   seizeLfo(signal);
29052   initLfo(signal);
29053   seizeLogpage(signal);
29054   lfoPtr.p->firstLfoPage = logPagePtr.i;
29055   lfoPtr.p->lfoPageNo = pageNo;
29056   lfoPtr.p->noPagesRw = 1;
29057   signal->theData[0] = logFilePtr.p->fileRef;
29058   signal->theData[1] = cownref;
29059   signal->theData[2] = lfoPtr.i;
29060   signal->theData[3] = ZLIST_OF_PAIRS;
29061   signal->theData[4] = ZVAR_NO_LOG_PAGE_WORD;
29062   signal->theData[5] = 1;
29063   signal->theData[6] = logPagePtr.i;
29064   signal->theData[7] = pageNo;
29065   sendSignal(NDBFS_REF, GSN_FSREADREQ, signal, 8, JBA);
29066 
29067   if (DEBUG_REDO)
29068   {
29069     ndbout_c("readSinglePage 1 page at part: %u file: %u page: %u (mb: %u)",
29070              logPartPtr.p->logPartNo,
29071              logFilePtr.p->fileNo,
29072              pageNo,
29073              pageNo >> ZTWOLOG_NO_PAGES_IN_MBYTE);
29074   }
29075 }//Dblqh::readSinglePage()
29076 
29077 /* --------------------------------------------------------------------------
29078  * -------       REMOVE COPY FRAGMENT FROM ACTIVE COPY LIST           -------
29079  *
29080  * ------------------------------------------------------------------------- */
releaseActiveCopy(Signal * signal)29081 void Dblqh::releaseActiveCopy(Signal* signal)
29082 {
29083   UintR tracFlag;
29084   UintR tracIndex;
29085 
29086   tracFlag = ZFALSE;
29087   for (tracIndex = 0; tracIndex < 4; tracIndex++) {
29088     if (tracFlag == ZFALSE) {
29089       jam();
29090       if (cactiveCopy[tracIndex] == fragptr.i) {
29091         jam();
29092         tracFlag = ZTRUE;
29093       }//if
29094     } else {
29095       if (tracIndex < 3) {
29096         jam();
29097         cactiveCopy[tracIndex - 1] = cactiveCopy[tracIndex];
29098       } else {
29099         jam();
29100         cactiveCopy[3] = RNIL;
29101       }//if
29102     }//if
29103   }//for
29104   ndbrequire(tracFlag == ZTRUE);
29105   cnoActiveCopy--;
29106 }//Dblqh::releaseActiveCopy()
29107 
29108 
29109 /* --------------------------------------------------------------------------
29110  * -------       RELEASE ADD FRAGMENT RECORD                          -------
29111  *
29112  * ------------------------------------------------------------------------- */
releaseAddfragrec(Signal * signal)29113 void Dblqh::releaseAddfragrec(Signal* signal)
29114 {
29115   addfragptr.p->addfragStatus = AddFragRecord::FREE;
29116   addfragptr.p->nextAddfragrec = cfirstfreeAddfragrec;
29117   cfirstfreeAddfragrec = addfragptr.i;
29118 }//Dblqh::releaseAddfragrec()
29119 
29120 /* --------------------------------------------------------------------------
29121  * -------     RELEASE A PAGE REFERENCE RECORD.                       -------
29122  *
29123  * ------------------------------------------------------------------------- */
releasePageRef(Signal * signal)29124 void Dblqh::releasePageRef(Signal* signal)
29125 {
29126   pageRefPtr.p->prNext = cfirstfreePageRef;
29127   cfirstfreePageRef = pageRefPtr.i;
29128 }//Dblqh::releasePageRef()
29129 
29130 /* --------------------------------------------------------------------------
29131  * --- RELEASE ALL PAGES IN THE MM BUFFER AFTER EXECUTING THE LOG ON IT. ----
29132  *
29133  * ------------------------------------------------------------------------- */
releaseMmPages(Signal * signal)29134 void Dblqh::releaseMmPages(Signal* signal)
29135 {
29136 RMP_LOOP:
29137   jam();
29138   pageRefPtr.i = logPartPtr.p->firstPageRef;
29139   if (pageRefPtr.i != RNIL) {
29140     jam();
29141     ptrCheckGuard(pageRefPtr, cpageRefFileSize, pageRefRecord);
29142     releasePrPages(signal);
29143     removePageRef(signal);
29144     goto RMP_LOOP;
29145   }//if
29146 }//Dblqh::releaseMmPages()
29147 
29148 /* --------------------------------------------------------------------------
29149  * -------     RELEASE A SET OF PAGES AFTER EXECUTING THE LOG ON IT.  -------
29150  *
29151  * ------------------------------------------------------------------------- */
releasePrPages(Signal * signal)29152 void Dblqh::releasePrPages(Signal* signal)
29153 {
29154   UintR trppIndex;
29155 
29156   for (trppIndex = 0; trppIndex <= 7; trppIndex++) {
29157     jam();
29158     logPagePtr.i = pageRefPtr.p->pageRef[trppIndex];
29159     ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
29160     releaseLogpage(signal);
29161   }//for
29162 }//Dblqh::releasePrPages()
29163 
29164 /* --------------------------------------------------------------------------
29165  * -------  REMOVE OPERATION RECORD FROM LIST ON LOG PART OF NOT      -------
29166  *               COMPLETED OPERATIONS IN THE LOG.
29167  *
29168  *       SUBROUTINE SHORT NAME = RLO
29169  * ------------------------------------------------------------------------- */
removeLogTcrec(Signal * signal,const TcConnectionrecPtr tcConnectptr)29170 void Dblqh::removeLogTcrec(Signal* signal,
29171                            const TcConnectionrecPtr tcConnectptr)
29172 {
29173   TcConnectionrecPtr rloTcNextConnectptr;
29174   TcConnectionrecPtr rloTcPrevConnectptr;
29175   rloTcPrevConnectptr.i = tcConnectptr.p->prevLogTcrec;
29176   rloTcNextConnectptr.i = tcConnectptr.p->nextLogTcrec;
29177   if (rloTcNextConnectptr.i != RNIL) {
29178     jam();
29179     ndbrequire(tcConnect_pool.getValidPtr(rloTcNextConnectptr));
29180     rloTcNextConnectptr.p->prevLogTcrec = rloTcPrevConnectptr.i;
29181   } else {
29182     jam();
29183     logPartPtr.p->lastLogTcrec = rloTcPrevConnectptr.i;
29184   }//if
29185   if (rloTcPrevConnectptr.i != RNIL) {
29186     jam();
29187     ndbrequire(tcConnect_pool.getValidPtr(rloTcPrevConnectptr));
29188     rloTcPrevConnectptr.p->nextLogTcrec = rloTcNextConnectptr.i;
29189   } else {
29190     jam();
29191     logPartPtr.p->firstLogTcrec = rloTcNextConnectptr.i;
29192   }//if
29193 }//Dblqh::removeLogTcrec()
29194 
29195 /* --------------------------------------------------------------------------
29196  * -------  REMOVE PAGE REFERENCE RECORD FROM LIST IN THIS LOG PART   -------
29197  *
29198  *       SUBROUTINE SHORT NAME = RPR
29199  * ------------------------------------------------------------------------- */
removePageRef(Signal * signal)29200 void Dblqh::removePageRef(Signal* signal)
29201 {
29202   PageRefRecordPtr rprPageRefPtr;
29203 
29204   pageRefPtr.i = logPartPtr.p->firstPageRef;
29205   if (pageRefPtr.i != RNIL) {
29206     jam();
29207     ptrCheckGuard(pageRefPtr, cpageRefFileSize, pageRefRecord);
29208     if (pageRefPtr.p->prNext == RNIL) {
29209       jam();
29210       logPartPtr.p->lastPageRef = RNIL;
29211       logPartPtr.p->firstPageRef = RNIL;
29212     } else {
29213       jam();
29214       logPartPtr.p->firstPageRef = pageRefPtr.p->prNext;
29215       rprPageRefPtr.i = pageRefPtr.p->prNext;
29216       ptrCheckGuard(rprPageRefPtr, cpageRefFileSize, pageRefRecord);
29217       rprPageRefPtr.p->prPrev = RNIL;
29218     }//if
29219     releasePageRef(signal);
29220   }//if
29221 }//Dblqh::removePageRef()
29222 
29223 /* ------------------------------------------------------------------------- */
29224 /* -------       RETURN FROM EXECUTION OF LOG                        ------- */
29225 /*                                                                           */
29226 /* ------------------------------------------------------------------------- */
returnExecLog(Signal * signal,const TcConnectionrecPtr tcConnectptr)29227 Uint32 Dblqh::returnExecLog(Signal* signal,
29228                             const TcConnectionrecPtr tcConnectptr)
29229 {
29230   tcConnectptr.p->connectState = TcConnectionrec::CONNECTED;
29231   initLogPointers(signal, tcConnectptr);
29232   logPartPtr.p->execSrExecuteIndex++;
29233   Uint32 result = checkIfExecLog(signal, tcConnectptr);
29234   if (result == ZOK) {
29235     jam();
29236 /* ------------------------------------------------------------------------- */
29237 /* THIS LOG RECORD WILL BE EXECUTED AGAIN TOWARDS ANOTHER NODE.              */
29238 /* ------------------------------------------------------------------------- */
29239     logPagePtr.i = logPartPtr.p->execSrLogPage;
29240     ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
29241     logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] =
29242                   logPartPtr.p->execSrLogPageIndex;
29243   } else {
29244     jam();
29245 /* ------------------------------------------------------------------------- */
29246 /*       NO MORE EXECUTION OF THIS LOG RECORD.                               */
29247 /* ------------------------------------------------------------------------- */
29248     if (logPartPtr.p->logExecState ==
29249 	LogPartRecord::LES_EXEC_LOGREC_FROM_FILE) {
29250       jam();
29251 /* ------------------------------------------------------------------------- */
29252 /* THE LOG RECORD WAS READ FROM DISK. RELEASE ITS PAGES IMMEDIATELY.         */
29253 /* ------------------------------------------------------------------------- */
29254       lfoPtr.i = logPartPtr.p->execSrLfoRec;
29255       ptrCheckGuard(lfoPtr, clfoFileSize, logFileOperationRecord);
29256       releaseLfoPages(signal);
29257       releaseLfo(signal);
29258       logPartPtr.p->logExecState = LogPartRecord::LES_EXEC_LOG;
29259       if (logPartPtr.p->execSrExecLogFile != logPartPtr.p->currentLogfile) {
29260         jam();
29261         LogFileRecordPtr clfLogFilePtr;
29262         clfLogFilePtr.i = logPartPtr.p->execSrExecLogFile;
29263         ptrCheckGuard(clfLogFilePtr, clogFileFileSize, logFileRecord);
29264 #ifndef NO_REDO_OPEN_FILE_CACHE
29265         closeFile_cache(signal, clfLogFilePtr, __LINE__);
29266 #else
29267         clfLogFilePtr.p->logFileStatus = LogFileRecord::CLOSING_EXEC_LOG;
29268         closeFile(signal, clfLogFilePtr, __LINE__);
29269 #endif
29270         result = ZCLOSE_FILE;
29271       }//if
29272     }//if
29273     logPartPtr.p->execSrExecuteIndex = 0;
29274     logPartPtr.p->execSrLogPage = RNIL;
29275     logPartPtr.p->execSrLogPageIndex = ZNIL;
29276     logPagePtr.i = logFilePtr.p->currentLogpage;
29277     ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
29278     logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = logPartPtr.p->savePageIndex;
29279   }//if
29280   return result;
29281 }//Dblqh::returnExecLog()
29282 
29283 /* --------------------------------------------------------------------------
29284  * -------       SEIZE ADD FRAGMENT RECORD                             ------
29285  *
29286  * ------------------------------------------------------------------------- */
seizeAddfragrec(Signal * signal)29287 void Dblqh::seizeAddfragrec(Signal* signal)
29288 {
29289   addfragptr.i = cfirstfreeAddfragrec;
29290   ptrCheckGuard(addfragptr, caddfragrecFileSize, addFragRecord);
29291   cfirstfreeAddfragrec = addfragptr.p->nextAddfragrec;
29292 
29293   addfragptr.p->accConnectptr = RNIL;
29294   addfragptr.p->tupConnectptr = RNIL;
29295   addfragptr.p->tuxConnectptr = RNIL;
29296   addfragptr.p->defValSectionI = RNIL;
29297   addfragptr.p->defValNextPos = 0;
29298   bzero(&addfragptr.p->m_createTabReq, sizeof(addfragptr.p->m_createTabReq));
29299   bzero(&addfragptr.p->m_lqhFragReq, sizeof(addfragptr.p->m_lqhFragReq));
29300   bzero(&addfragptr.p->m_addAttrReq, sizeof(addfragptr.p->m_addAttrReq));
29301   bzero(&addfragptr.p->m_dropFragReq, sizeof(addfragptr.p->m_dropFragReq));
29302   bzero(&addfragptr.p->m_dropTabReq, sizeof(addfragptr.p->m_dropTabReq));
29303   addfragptr.p->addfragErrorCode = 0;
29304   addfragptr.p->attrSentToTup = 0;
29305   addfragptr.p->attrReceived = 0;
29306   addfragptr.p->totalAttrReceived = 0;
29307 }//Dblqh::seizeAddfragrec()
29308 
29309 /* --------------------------------------------------------------------------
29310  * -------       SEIZE FRAGMENT RECORD                                -------
29311  *
29312  * ------------------------------------------------------------------------- */
29313 /* ------------------------------------------------------------------------- */
29314 /* -------     SEIZE A PAGE REFERENCE RECORD.                        ------- */
29315 /*                                                                           */
29316 /* ------------------------------------------------------------------------- */
seizePageRef(Signal * signal)29317 void Dblqh::seizePageRef(Signal* signal)
29318 {
29319   pageRefPtr.i = cfirstfreePageRef;
29320   ptrCheckGuard(pageRefPtr, cpageRefFileSize, pageRefRecord);
29321   cfirstfreePageRef = pageRefPtr.p->prNext;
29322   pageRefPtr.p->prNext = RNIL;
29323 }//Dblqh::seizePageRef()
29324 
29325 /* --------------------------------------------------------------------------
29326  * -------               SEND ABORTED                                 -------
29327  *
29328  * ------------------------------------------------------------------------- */
sendAborted(Signal * signal,const TcConnectionrecPtr tcConnectptr)29329 void Dblqh::sendAborted(Signal* signal,
29330                         const TcConnectionrecPtr tcConnectptr)
29331 {
29332   UintR TlastInd;
29333   if (tcConnectptr.p->nextReplica == ZNIL) {
29334     TlastInd = ZTRUE;
29335   } else {
29336     TlastInd = ZFALSE;
29337   }//if
29338   signal->theData[0] = tcConnectptr.p->tcOprec;
29339   signal->theData[1] = tcConnectptr.p->transid[0];
29340   signal->theData[2] = tcConnectptr.p->transid[1];
29341   signal->theData[3] = cownNodeid;
29342   signal->theData[4] = TlastInd;
29343   sendSignal(tcConnectptr.p->tcBlockref, GSN_ABORTED, signal, 5, JBB);
29344   return;
29345 }//Dblqh::sendAborted()
29346 
29347 /* --------------------------------------------------------------------------
29348  * -------               SEND LQH_TRANSCONF                           -------
29349  *
29350  * ------------------------------------------------------------------------- */
sendLqhTransconf(Signal * signal,LqhTransConf::OperationStatus stat,const TcConnectionrecPtr tcConnectptr)29351 void Dblqh::sendLqhTransconf(Signal* signal,
29352                              LqhTransConf::OperationStatus stat,
29353                              const TcConnectionrecPtr tcConnectptr)
29354 {
29355   TcNodeFailRecordPtr tcNodeFailPtr;
29356   tcNodeFailPtr.i = tcConnectptr.p->tcNodeFailrec;
29357   ptrCheckGuard(tcNodeFailPtr, ctcNodeFailrecFileSize, tcNodeFailRecord);
29358 
29359   Uint32 reqInfo = 0;
29360   LqhTransConf::setReplicaType(reqInfo, tcConnectptr.p->replicaType);
29361   LqhTransConf::setReplicaNo(reqInfo, tcConnectptr.p->seqNoReplica);
29362   LqhTransConf::setLastReplicaNo(reqInfo, tcConnectptr.p->lastReplicaNo);
29363   LqhTransConf::setSimpleFlag(reqInfo, tcConnectptr.p->opSimple);
29364   LqhTransConf::setDirtyFlag(reqInfo, tcConnectptr.p->dirtyOp);
29365   LqhTransConf::setOperation(reqInfo, tcConnectptr.p->operation);
29366 
29367   LqhTransConf * const lqhTransConf = (LqhTransConf *)&signal->theData[0];
29368   lqhTransConf->tcRef           = tcNodeFailPtr.p->newTcRef;
29369   lqhTransConf->lqhNodeId       = cownNodeid;
29370   lqhTransConf->operationStatus = stat;
29371   lqhTransConf->lqhConnectPtr   = tcConnectptr.i;
29372   lqhTransConf->transId1        = tcConnectptr.p->transid[0];
29373   lqhTransConf->transId2        = tcConnectptr.p->transid[1];
29374   lqhTransConf->oldTcOpRec      = tcConnectptr.p->tcOprec;
29375   lqhTransConf->requestInfo     = reqInfo;
29376   lqhTransConf->gci_hi          = tcConnectptr.p->gci_hi;
29377   lqhTransConf->nextNodeId1     = tcConnectptr.p->nextReplica;
29378   lqhTransConf->nextNodeId2     = tcConnectptr.p->nodeAfterNext[0];
29379   lqhTransConf->nextNodeId3     = tcConnectptr.p->nodeAfterNext[1];
29380   lqhTransConf->apiRef          = tcConnectptr.p->applRef;
29381   lqhTransConf->apiOpRec        = tcConnectptr.p->applOprec;
29382   lqhTransConf->tableId         = tcConnectptr.p->tableref;
29383   lqhTransConf->gci_lo          = tcConnectptr.p->gci_lo;
29384   lqhTransConf->fragId          = tcConnectptr.p->fragmentid;
29385   /**
29386     maxInstanceId is ignored for all LQH_TRANSCONF except the last one sent with
29387     LqhTransConf::LastTransConf as the state. This state is never called in this
29388     function. We set the value to the TC instance that handled this transaction.
29389     It's not needed but better set it to something useful than to something
29390     not useful.
29391   */
29392   ndbassert(stat != LqhTransConf::LastTransConf);
29393   lqhTransConf->maxInstanceId = refToInstance(tcConnectptr.p->tcBlockref);
29394   sendSignal(tcNodeFailPtr.p->newTcBlockref, GSN_LQH_TRANSCONF,
29395 	     signal, LqhTransConf::SignalLength, JBB);
29396   tcNodeFailPtr.p->tcRecNow = tcConnectptr.i + 1;
29397   signal->theData[0] = ZLQH_TRANS_NEXT;
29398   signal->theData[1] = tcNodeFailPtr.i;
29399   sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
29400 
29401   if (0)
29402   {
29403     ndbout_c("sending LQH_TRANSCONF %u transid: H'%.8x, H'%.8x op: %u state: %u(%u) marker: %u",
29404              tcConnectptr.i,
29405              tcConnectptr.p->transid[0],
29406              tcConnectptr.p->transid[1],
29407              tcConnectptr.p->operation,
29408              tcConnectptr.p->transactionState,
29409              stat,
29410              tcConnectptr.p->commitAckMarker);
29411   }
29412 }//Dblqh::sendLqhTransconf()
29413 
29414 /* --------------------------------------------------------------------------
29415  * -------               START ANOTHER PHASE OF LOG EXECUTION         -------
29416  *       RESET THE VARIABLES NEEDED BY THIS PROCESS AND SEND THE START SIGNAL
29417  *
29418  * ------------------------------------------------------------------------- */
startExecSr(Signal * signal)29419 void Dblqh::startExecSr(Signal* signal)
29420 {
29421   c_lcp_complete_fragments.first(fragptr);
29422   signal->theData[0] = fragptr.i;
29423   sendSignal(cownref, GSN_START_EXEC_SR, signal, 1, JBB);
29424 }//Dblqh::startExecSr()
29425 
29426 /* ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
29427  * ¤¤¤¤¤¤¤                            LOG MODULE                      ¤¤¤¤¤¤¤
29428  * ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤ */
29429 /* --------------------------------------------------------------------------
29430  * -------       STEP FORWARD IN FRAGMENT LOG DURING LOG EXECUTION    -------
29431  *
29432  * ------------------------------------------------------------------------- */
stepAhead(Signal * signal,Uint32 stepAheadWords)29433 void Dblqh::stepAhead(Signal* signal, Uint32 stepAheadWords)
29434 {
29435   UintR tsaPos;
29436 
29437   tsaPos = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX];
29438   while ((stepAheadWords + tsaPos) >= ZPAGE_SIZE) {
29439     jam();
29440     logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = ZPAGE_SIZE;
29441     stepAheadWords = stepAheadWords - (ZPAGE_SIZE - tsaPos);
29442     logFilePtr.p->currentLogpage = logPagePtr.p->logPageWord[ZNEXT_PAGE];
29443     logPagePtr.i = logPagePtr.p->logPageWord[ZNEXT_PAGE];
29444     logFilePtr.p->currentFilepage++;
29445     ptrCheckGuardErr(logPagePtr, clogPageFileSize, logPageRecord,
29446                      NDBD_EXIT_SR_REDOLOG);
29447     logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = ZPAGE_HEADER_SIZE;
29448     logPartPtr.p->execSrPagesRead--;
29449     logPartPtr.p->execSrPagesExecuted++;
29450     tsaPos = ZPAGE_HEADER_SIZE;
29451   }//while
29452   logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = stepAheadWords + tsaPos;
29453 }//Dblqh::stepAhead()
29454 
29455 /* --------------------------------------------------------------------------
29456  * -------               WRITE A ABORT LOG RECORD                     -------
29457  *
29458  *       SUBROUTINE SHORT NAME: WAL
29459  * ------------------------------------------------------------------------- */
writeAbortLog(Signal * signal,TcConnectionrec * regTcPtr,LogPartRecord * regLogPartPtr)29460 void Dblqh::writeAbortLog(Signal* signal,
29461                           TcConnectionrec* regTcPtr,
29462                           LogPartRecord *regLogPartPtr)
29463 {
29464   if ((ZABORT_LOG_SIZE + ZNEXT_LOG_SIZE) >
29465       logFilePtr.p->remainingWordsInMbyte) {
29466     jam();
29467     changeMbyte(signal);
29468   }//if
29469   regLogPartPtr->m_total_written_words += ZABORT_LOG_SIZE;
29470   logFilePtr.p->remainingWordsInMbyte =
29471     logFilePtr.p->remainingWordsInMbyte - ZABORT_LOG_SIZE;
29472   writeLogWord(signal, ZABORT_TYPE);
29473   writeLogWord(signal, regTcPtr->transid[0]);
29474   writeLogWord(signal, regTcPtr->transid[1]);
29475   decrement_committed_mbytes(regLogPartPtr,
29476                              regTcPtr);
29477 }//Dblqh::writeAbortLog()
29478 
29479 /* --------------------------------------------------------------------------
29480  * -------               WRITE A COMMIT LOG RECORD                    -------
29481  *
29482  *       SUBROUTINE SHORT NAME: WCL
29483  * ------------------------------------------------------------------------- */
writeCommitLog(Signal * signal,LogPartRecordPtr regLogPartPtr,TcConnectionrec * regTcPtr)29484 void Dblqh::writeCommitLog(Signal* signal,
29485                            LogPartRecordPtr regLogPartPtr,
29486                            TcConnectionrec* regTcPtr)
29487 {
29488   LogFileRecordPtr regLogFilePtr;
29489   LogPageRecordPtr regLogPagePtr;
29490   regLogFilePtr.i = regLogPartPtr.p->currentLogfile;
29491   ptrCheckGuard(regLogFilePtr, clogFileFileSize, logFileRecord);
29492   regLogPagePtr.i = regLogFilePtr.p->currentLogpage;
29493   Uint32 twclTmp = regLogFilePtr.p->remainingWordsInMbyte;
29494   ptrCheckGuard(regLogPagePtr, clogPageFileSize, logPageRecord);
29495   logPartPtr = regLogPartPtr;
29496   logFilePtr = regLogFilePtr;
29497   logPagePtr = regLogPagePtr;
29498   if ((ZCOMMIT_LOG_SIZE + ZNEXT_LOG_SIZE) > twclTmp) {
29499     jam();
29500     changeMbyte(signal);
29501     twclTmp = logFilePtr.p->remainingWordsInMbyte;
29502   }//if
29503 
29504   Uint32 twclLogPos = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX];
29505   Uint32 tableId = regTcPtr->tableref;
29506   Uint32 schemaVersion = regTcPtr->schemaVersion;
29507   Uint32 fragId = regTcPtr->fragmentid;
29508   Uint32 fileNo = regTcPtr->logStartFileNo;
29509   Uint32 startPageNo = regTcPtr->logStartPageNo;
29510   Uint32 pageIndex = regTcPtr->logStartPageIndex;
29511   Uint32 stopPageNo = regTcPtr->logStopPageNo;
29512   Uint32 gci = regTcPtr->gci_hi;
29513   regLogPartPtr.p->m_total_written_words += ZCOMMIT_LOG_SIZE;
29514   logFilePtr.p->remainingWordsInMbyte = twclTmp - ZCOMMIT_LOG_SIZE;
29515 
29516   if ((twclLogPos + ZCOMMIT_LOG_SIZE) >= ZPAGE_SIZE) {
29517     writeLogWord(signal, ZCOMMIT_TYPE);
29518     writeLogWord(signal, tableId);
29519     writeLogWord(signal, schemaVersion);
29520     writeLogWord(signal, fragId);
29521     writeLogWord(signal, fileNo);
29522     writeLogWord(signal, startPageNo);
29523     writeLogWord(signal, pageIndex);
29524     writeLogWord(signal, stopPageNo);
29525     writeLogWord(signal, gci);
29526   } else {
29527     Uint32* dataPtr = &logPagePtr.p->logPageWord[twclLogPos];
29528     logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = twclLogPos + ZCOMMIT_LOG_SIZE;
29529     dataPtr[0] = ZCOMMIT_TYPE;
29530     dataPtr[1] = tableId;
29531     dataPtr[2] = schemaVersion;
29532     dataPtr[3] = fragId;
29533     dataPtr[4] = fileNo;
29534     dataPtr[5] = startPageNo;
29535     dataPtr[6] = pageIndex;
29536     dataPtr[7] = stopPageNo;
29537     dataPtr[8] = gci;
29538   }//if
29539   decrement_committed_mbytes(regLogPartPtr.p,
29540                              regTcPtr);
29541   TcConnectionrecPtr rloTcNextConnectptr;
29542   TcConnectionrecPtr rloTcPrevConnectptr;
29543   rloTcPrevConnectptr.i = regTcPtr->prevLogTcrec;
29544   rloTcNextConnectptr.i = regTcPtr->nextLogTcrec;
29545   if (rloTcNextConnectptr.i != RNIL) {
29546     jam();
29547     ndbrequire(tcConnect_pool.getValidPtr(rloTcNextConnectptr));
29548     rloTcNextConnectptr.p->prevLogTcrec = rloTcPrevConnectptr.i;
29549   } else {
29550     regLogPartPtr.p->lastLogTcrec = rloTcPrevConnectptr.i;
29551   }//if
29552   if (rloTcPrevConnectptr.i != RNIL) {
29553     jam();
29554     ndbrequire(tcConnect_pool.getValidPtr(rloTcPrevConnectptr));
29555     rloTcPrevConnectptr.p->nextLogTcrec = rloTcNextConnectptr.i;
29556   } else {
29557     regLogPartPtr.p->firstLogTcrec = rloTcNextConnectptr.i;
29558   }//if
29559 }//Dblqh::writeCommitLog()
29560 
29561 /* --------------------------------------------------------------------------
29562  * -------               WRITE A COMPLETED GCI LOG RECORD             -------
29563  *
29564  *       SUBROUTINE SHORT NAME: WCG
29565 // Input Pointers:
29566 // logFilePtr
29567 // logPartPtr
29568  * ------------------------------------------------------------------------- */
writeCompletedGciLog(Signal * signal)29569 void Dblqh::writeCompletedGciLog(Signal* signal)
29570 {
29571   if ((ZCOMPLETED_GCI_LOG_SIZE + ZNEXT_LOG_SIZE) >
29572       logFilePtr.p->remainingWordsInMbyte) {
29573     jam();
29574     changeMbyte(signal);
29575   }//if
29576 
29577   if (ERROR_INSERTED(5051) && (logFilePtr.p->currentFilepage > 0) &&
29578       (logFilePtr.p->currentFilepage % 32) == 0)
29579   {
29580     SET_ERROR_INSERT_VALUE(5000);
29581   }
29582 
29583   logFilePtr.p->remainingWordsInMbyte =
29584     logFilePtr.p->remainingWordsInMbyte - ZCOMPLETED_GCI_LOG_SIZE;
29585 
29586   if (DEBUG_REDO)
29587   {
29588     ndbout_c("writeCompletedGciLog gci: %u part: %u file: %u page: %u (mb: %u)",
29589              cnewestCompletedGci,
29590              logPartPtr.p->logPartNo,
29591              logFilePtr.p->fileNo,
29592              logFilePtr.p->currentFilepage,
29593              logFilePtr.p->currentFilepage >> ZTWOLOG_NO_PAGES_IN_MBYTE);
29594   }
29595 
29596   writeLogWord(signal, ZCOMPLETED_GCI_TYPE);
29597   writeLogWord(signal, cnewestCompletedGci);
29598   logPartPtr.p->m_total_written_words += ZCOMPLETED_GCI_LOG_SIZE;
29599   logPartPtr.p->logPartNewestCompletedGCI = cnewestCompletedGci;
29600 }//Dblqh::writeCompletedGciLog()
29601 
29602 /* --------------------------------------------------------------------------
29603  * -------         WRITE A DIRTY PAGE DURING LOG EXECUTION            -------
29604  *
29605  *     SUBROUTINE SHORT NAME: WD
29606  * ------------------------------------------------------------------------- */
writeDirty(Signal * signal,Uint32 place)29607 void Dblqh::writeDirty(Signal* signal, Uint32 place)
29608 {
29609   logPagePtr.p->logPageWord[ZPOS_DIRTY] = ZNOT_DIRTY;
29610 
29611   ndbassert(logPartPtr.p->prevFilepage ==
29612             logPagePtr.p->logPageWord[ZPOS_PAGE_NO]);
29613   writeDbgInfoPageHeader(logPagePtr, place, logPartPtr.p->prevFilepage,
29614                          ZPAGE_SIZE);
29615   // Calculate checksum for page
29616   logPagePtr.p->logPageWord[ZPOS_CHECKSUM] = calcPageCheckSum(logPagePtr);
29617 
29618   seizeLfo(signal);
29619   initLfo(signal);
29620   lfoPtr.p->lfoPageNo = logPartPtr.p->prevFilepage;
29621   lfoPtr.p->noPagesRw = 1;
29622   lfoPtr.p->lfoState = LogFileOperationRecord::WRITE_DIRTY;
29623   lfoPtr.p->firstLfoPage = logPagePtr.i;
29624   signal->theData[0] = logFilePtr.p->fileRef;
29625   signal->theData[1] = cownref;
29626   signal->theData[2] = lfoPtr.i;
29627   signal->theData[3] = ZLIST_OF_PAIRS_SYNCH;
29628   signal->theData[4] = ZVAR_NO_LOG_PAGE_WORD;
29629   signal->theData[5] = 1;
29630   signal->theData[6] = logPagePtr.i;
29631   signal->theData[7] = logPartPtr.p->prevFilepage;
29632   sendSignal(NDBFS_REF, GSN_FSWRITEREQ, signal, 8, JBA);
29633 
29634   ndbrequire(logFilePtr.p->fileRef != RNIL);
29635 
29636   logPartPtr.p->m_io_tracker.send_io(32768);
29637 
29638   if (DEBUG_REDO)
29639   {
29640     ndbout_c("writeDirty 1 page at part: %u file: %u page: %u (mb: %u)",
29641              logPartPtr.p->logPartNo,
29642              logFilePtr.p->fileNo,
29643              logPartPtr.p->prevFilepage,
29644              logPartPtr.p->prevFilepage >> ZTWOLOG_NO_PAGES_IN_MBYTE);
29645   }
29646 }//Dblqh::writeDirty()
29647 
29648 /* --------------------------------------------------------------------------
29649  * -------          WRITE A WORD INTO THE LOG, CHECK FOR NEW PAGE     -------
29650  *
29651  *       SUBROUTINE SHORT NAME:  WLW
29652  * ------------------------------------------------------------------------- */
writeLogWord(Signal * signal,Uint32 data)29653 void Dblqh::writeLogWord(Signal* signal, Uint32 data)
29654 {
29655   Uint32 logPos = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX];
29656   ndbrequire(logPos < ZPAGE_SIZE);
29657   logPagePtr.p->logPageWord[logPos] = data;
29658   logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = logPos + 1;
29659   if ((logPos + 1) == ZPAGE_SIZE) {
29660     jam();
29661     completedLogPage(signal, ZNORMAL, __LINE__);
29662     seizeLogpage(signal);
29663     initLogpage(signal);
29664     logFilePtr.p->currentLogpage = logPagePtr.i;
29665     logFilePtr.p->currentFilepage++;
29666   }//if
29667 }//Dblqh::writeLogWord()
29668 
29669 /* --------------------------------------------------------------------------
29670  * -------   WRITE MULTIPLE WORDS INTO THE LOG, CHECK FOR NEW PAGES   -------
29671  *
29672  * ------------------------------------------------------------------------- */
29673 
writeLogWords(Signal * signal,const Uint32 * data,Uint32 len)29674 void Dblqh::writeLogWords(Signal* signal, const Uint32* data, Uint32 len)
29675 {
29676   Uint32 logPos = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX];
29677   ndbrequire(logPos < ZPAGE_SIZE);
29678   Uint32 wordsThisPage= ZPAGE_SIZE - logPos;
29679 
29680   while (len >= wordsThisPage)
29681   {
29682     /* Fill rest of the log page */
29683     MEMCOPY_NO_WORDS(&logPagePtr.p->logPageWord[logPos],
29684                      data,
29685                      wordsThisPage);
29686     logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = ZPAGE_SIZE;
29687     data+= wordsThisPage;
29688     len-= wordsThisPage;
29689 
29690     /* Mark page completed and get a new one */
29691     jam();
29692     completedLogPage(signal, ZNORMAL, __LINE__);
29693     seizeLogpage(signal);
29694     initLogpage(signal);
29695     logFilePtr.p->currentLogpage = logPagePtr.i;
29696     logFilePtr.p->currentFilepage++;
29697 
29698     logPos = logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX];
29699     ndbrequire(logPos < ZPAGE_SIZE);
29700     wordsThisPage= ZPAGE_SIZE - logPos;
29701   }
29702 
29703   if (len > 0)
29704   {
29705     /* No need to worry about next page */
29706     ndbassert( len < wordsThisPage );
29707     /* Write partial log page */
29708     MEMCOPY_NO_WORDS(&logPagePtr.p->logPageWord[logPos],
29709                      data,
29710                      len);
29711     logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] = logPos + len;
29712   }
29713 
29714   ndbassert( logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] < ZPAGE_SIZE );
29715 }
29716 
29717 /* --------------------------------------------------------------------------
29718  * -------         WRITE A NEXT LOG RECORD AND CHANGE TO NEXT MBYTE   -------
29719  *
29720  *       SUBROUTINE SHORT NAME:  WNL
29721  * Input Pointers:
29722  * logFilePtr(Redefines)
29723  * logPagePtr (Redefines)
29724  * logPartPtr
29725  * When changing to a new MByte we always ensure that we sync the REDO log.
29726  * This more or less annuls the sync once per 1 MByte maintained by NDBFS,
29727  * it does however make it easier to reason around recovery and also makes
29728  * it less likely of future bugs due to changes in NDBFS. This is why we
29729  * use true on completedLogPage in this function.
29730  * ------------------------------------------------------------------------- */
writeNextLog(Signal * signal)29731 void Dblqh::writeNextLog(Signal* signal)
29732 {
29733   LogFileRecordPtr wnlNextLogFilePtr;
29734   UintR twnlNextFileNo;
29735   UintR twnlNewMbyte;
29736   UintR twnlRemWords;
29737   UintR twnlNextMbyte;
29738 
29739 /* -------------------------------------------------- */
29740 /*       CALCULATE THE NEW NUMBER OF REMAINING WORDS  */
29741 /*       AS 128*2036 WHERE 128 * 8 KBYTE = 1 MBYTE    */
29742 /*       AND 2036 IS THE NUMBER OF WORDS IN A PAGE    */
29743 /*       THAT IS USED FOR LOG INFORMATION.            */
29744 /* -------------------------------------------------- */
29745   twnlRemWords = ZPAGE_SIZE - ZPAGE_HEADER_SIZE;
29746   twnlRemWords = twnlRemWords * ZPAGES_IN_MBYTE;
29747   wnlNextLogFilePtr.i = logFilePtr.p->nextLogFile;
29748   ptrCheckGuard(wnlNextLogFilePtr, clogFileFileSize, logFileRecord);
29749 /* -------------------------------------------------- */
29750 /*       WRITE THE NEXT LOG RECORD.                   */
29751 /* -------------------------------------------------- */
29752   ndbrequire(logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX] < ZPAGE_SIZE);
29753   logPagePtr.p->logPageWord[logPagePtr.p->logPageWord[ZCURR_PAGE_INDEX]] =
29754     ZNEXT_MBYTE_TYPE;
29755   if (logFilePtr.p->currentMbyte == (clogFileSize - 1)) {
29756     jam();
29757 /* -------------------------------------------------- */
29758 /*       CALCULATE THE NEW REMAINING WORDS WHEN       */
29759 /*       CHANGING LOG FILE IS PERFORMED               */
29760 /* -------------------------------------------------- */
29761     twnlRemWords = twnlRemWords - (ZPAGE_SIZE - ZPAGE_HEADER_SIZE);
29762 /* -------------------------------------------------- */
29763 /*       ENSURE THAT THE LOG PAGES ARE WRITTEN AFTER  */
29764 /*       WE HAVE CHANGED MBYTE.                       */
29765 /* -------------------------------------------------- */
29766 /*       ENSURE LAST PAGE IN PREVIOUS MBYTE IS        */
29767 /*       WRITTEN AND THAT THE STATE OF THE WRITE IS   */
29768 /*       PROPERLY SET.                                */
29769 /* -------------------------------------------------- */
29770 /*       WE HAVE TO CHANGE LOG FILE                   */
29771 /* -------------------------------------------------- */
29772     completedLogPage(signal, ZLAST_WRITE_IN_FILE, __LINE__, true);
29773     if (wnlNextLogFilePtr.p->fileNo == 0) {
29774       jam();
29775 /* -------------------------------------------------- */
29776 /*       WE HAVE FINALISED A LOG LAP, START FROM LOG  */
29777 /*       FILE 0 AGAIN                                 */
29778 /* -------------------------------------------------- */
29779       logPartPtr.p->logLap++;
29780     }//if
29781     logPartPtr.p->currentLogfile = wnlNextLogFilePtr.i;
29782     logFilePtr.i = wnlNextLogFilePtr.i;
29783     logFilePtr.p = wnlNextLogFilePtr.p;
29784     twnlNewMbyte = 0;
29785   } else {
29786     jam();
29787 /* -------------------------------------------------- */
29788 /*       INCREMENT THE CURRENT MBYTE                  */
29789 /*       SET PAGE INDEX TO PAGE HEADER SIZE           */
29790 /* -------------------------------------------------- */
29791     completedLogPage(signal, ZENFORCE_WRITE, __LINE__, true);
29792     twnlNewMbyte = logFilePtr.p->currentMbyte + 1;
29793   }//if
29794 /* -------------------------------------------------- */
29795 /*       CHANGE TO NEW LOG FILE IF NECESSARY          */
29796 /*       UPDATE THE FILE POSITION TO THE NEW MBYTE    */
29797 /*       FOUND IN PAGE PART OF TNEXT_LOG_PTR          */
29798 /*       ALLOCATE AND INITIATE A NEW PAGE SINCE WE    */
29799 /*       HAVE SENT THE PREVIOUS PAGE TO DISK.         */
29800 /*       SET THE NEW NUMBER OF REMAINING WORDS IN THE */
29801 /*       NEW MBYTE ALLOCATED.                         */
29802 /* -------------------------------------------------- */
29803   logFilePtr.p->currentMbyte = twnlNewMbyte;
29804   logFilePtr.p->filePosition = twnlNewMbyte * ZPAGES_IN_MBYTE;
29805   logFilePtr.p->currentFilepage = twnlNewMbyte * ZPAGES_IN_MBYTE;
29806   logFilePtr.p->remainingWordsInMbyte = twnlRemWords;
29807   seizeLogpage(signal);
29808   if (logFilePtr.p->currentMbyte == 0) {
29809     jam();
29810     logFilePtr.p->lastPageWritten = 0;
29811     if (logFilePtr.p->fileNo == 0) {
29812       jam();
29813       releaseLogpage(signal);
29814       logPagePtr.i = logFilePtr.p->logPageZero;
29815       ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
29816     }//if
29817   }//if
29818   initLogpage(signal);
29819   logFilePtr.p->currentLogpage = logPagePtr.i;
29820   if (logFilePtr.p->currentMbyte == 0) {
29821     jam();
29822 /* -------------------------------------------------- */
29823 /*       THIS IS A NEW FILE, WRITE THE FILE DESCRIPTOR*/
29824 /*       ALSO OPEN THE NEXT LOG FILE TO ENSURE THAT   */
29825 /*       THIS FILE IS OPEN WHEN ITS TURN COMES.       */
29826 /* -------------------------------------------------- */
29827 #ifdef ERROR_INSERT
29828     if (delayOpenFilePtrI > 0 &&  logFilePtr.i == delayOpenFilePtrI)
29829     {
29830       // Error insertion (the required file is not opened) is seen.
29831       ndbassert(logFilePtr.p->fileRef == RNIL);
29832       // Clear the inserted error 5090.
29833       delayOpenFilePtrI = 0;
29834     }
29835 #endif
29836    if (logFilePtr.p->fileRef == RNIL)
29837     {
29838       jam();
29839       logFilePtr.p->fileChangeState = LogFileRecord::WAIT_FOR_OPEN_NEXT_FILE;
29840       update_log_problem(signal, logPartPtr,
29841                          LogPartRecord::P_FILE_CHANGE_PROBLEM,
29842                          /* set */ true);
29843       // This problem will be cleared by writePageZeroLab() when the file
29844       // is opened and the zero page is written.
29845     }
29846     else
29847     {
29848       writeFileHeaderOpen(signal, ZNORMAL);
29849       openNextLogfile(signal);
29850       logFilePtr.p->fileChangeState = LogFileRecord::BOTH_WRITES_ONGOING;
29851     }
29852   }//if
29853   if (logFilePtr.p->fileNo == logPartPtr.p->logTailFileNo)
29854   {
29855     if (logFilePtr.p->currentMbyte == logPartPtr.p->logTailMbyte)
29856     {
29857       jam();
29858 /* -------------------------------------------------- */
29859 /*       THE HEAD AND TAIL HAS MET. THIS SHOULD NEVER */
29860 /*       OCCUR. CAN HAPPEN IF THE LOCAL CHECKPOINTS   */
29861 /*       TAKE FAR TOO LONG TIME. SO TIMING PROBLEMS   */
29862 /*       CAN INVOKE THIS SYSTEM CRASH. HOWEVER ONLY   */
29863 /*       VERY SERIOUS TIMING PROBLEMS.                */
29864 /* -------------------------------------------------- */
29865       char buf[100];
29866       BaseString::snprintf(buf, sizeof(buf),
29867                            "Head/Tail met in REDO log, logpart: %u"
29868                            " file: %u mbyte: %u state: %u log-problem: %u",
29869                            logPartPtr.p->logPartNo,
29870                            logFilePtr.p->fileNo,
29871                            logFilePtr.p->currentMbyte,
29872                            logPartPtr.p->logPartState,
29873                            logPartPtr.p->m_log_problems);
29874 
29875 
29876       signal->theData[0] = 2398;
29877       execDUMP_STATE_ORD(signal);
29878       progError(__LINE__, NDBD_EXIT_NO_MORE_REDOLOG, buf);
29879       systemError(signal, __LINE__);
29880     }//if
29881   }//if
29882   if (logFilePtr.p->currentMbyte == (clogFileSize - 1)) {
29883     jam();
29884     twnlNextMbyte = 0;
29885     if (logFilePtr.p->fileChangeState != LogFileRecord::NOT_ONGOING)
29886     {
29887       jam();
29888       update_log_problem(signal, logPartPtr,
29889                          LogPartRecord::P_FILE_CHANGE_PROBLEM,
29890                          /* set */ true);
29891     }//if
29892     twnlNextFileNo = wnlNextLogFilePtr.p->fileNo;
29893   } else {
29894     jam();
29895     twnlNextMbyte = logFilePtr.p->currentMbyte + 1;
29896     twnlNextFileNo = logFilePtr.p->fileNo;
29897   }//if
29898 
29899   LogPosition head = { twnlNextFileNo, twnlNextMbyte };
29900   LogPosition tail = { logPartPtr.p->logTailFileNo, logPartPtr.p->logTailMbyte};
29901   Uint64 free_mb = free_log(head, tail, logPartPtr.p->noLogFiles, clogFileSize);
29902   if (free_mb <= c_free_mb_force_lcp_limit)
29903   {
29904     jam();
29905     force_lcp(signal);
29906   }
29907 
29908   if (free_mb <=
29909       (c_free_mb_tail_problem_limit + get_committed_mbytes(logPartPtr.p)))
29910   {
29911     jam();
29912     update_log_problem(signal,
29913                        logPartPtr,
29914                        LogPartRecord::P_TAIL_PROBLEM,
29915                        true);
29916   }
29917 
29918   if (ERROR_INSERTED(5058) &&
29919       (twnlNextMbyte + 3 >= clogFileSize) &&
29920       logFilePtr.p->fileNo != 0 &&
29921       logFilePtr.p->nextLogFile != logPartPtr.p->firstLogfile)
29922   {
29923     jam();
29924     srand((int)time(0));
29925     Uint32 wait = 3 + (rand() % 5);
29926 
29927     suspendFile(signal, logFilePtr, /* forever */ 0);
29928     suspendFile(signal, logPartPtr.p->firstLogfile, /* forever */ 0);
29929     signal->theData[0] = 9999;
29930     sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, wait * 1000, 1);
29931     CLEAR_ERROR_INSERT_VALUE;
29932   }
29933 
29934   if (ERROR_INSERTED(5059) &&
29935       twnlNextMbyte == 4 &&
29936       logFilePtr.p->fileNo != 0)
29937   {
29938     signal->theData[0] = 9999;
29939     sendSignal(CMVMI_REF, GSN_NDB_TAMPER, signal, 1, JBA);
29940   }
29941 
29942 }//Dblqh::writeNextLog()
29943 
29944 bool
validate_filter(Signal * signal)29945 Dblqh::validate_filter(Signal* signal)
29946 {
29947   Uint32 * start = signal->theData + 1;
29948   Uint32 * end = signal->theData + signal->getLength();
29949   if (start == end)
29950   {
29951     infoEvent("No filter specified, not listing...");
29952     if (!ERROR_INSERTED(4002))
29953       return false;
29954     else
29955       return true;
29956   }
29957 
29958   while(start < end)
29959   {
29960     switch(* start){
29961     case 0: // Table
29962     case 1: // API Node
29963     case 3: // TC Node
29964       start += 2;
29965       break;
29966     case 2: // Transid
29967       start += 3;
29968       break;
29969     default:
29970       infoEvent("Invalid filter op: 0x%x pos: %ld",
29971 		* start,
29972 		(long int)(start - (signal->theData + 1)));
29973       return false;
29974     }
29975   }
29976 
29977   if (start != end)
29978   {
29979     infoEvent("Invalid filter, unexpected end");
29980     return false;
29981   }
29982 
29983   return true;
29984 }
29985 
29986 bool
match_and_print(Signal * signal,Ptr<TcConnectionrec> tcRec)29987 Dblqh::match_and_print(Signal* signal, Ptr<TcConnectionrec> tcRec)
29988 {
29989   Uint32 len = signal->getLength();
29990   Uint32* start = signal->theData + 3;
29991   Uint32* end = signal->theData + len;
29992   while (start < end)
29993   {
29994     switch(* start){
29995     case 0:
29996       if (tcRec.p->tableref != * (start + 1))
29997 	return false;
29998       start += 2;
29999       break;
30000     case 1:
30001       if (refToNode(tcRec.p->applRef) != * (start + 1))
30002 	return false;
30003       start += 2;
30004       break;
30005     case 2:
30006       if (tcRec.p->transid[0] != * (start + 1) ||
30007 	  tcRec.p->transid[1] != * (start + 2))
30008 	return false;
30009       start += 3;
30010       break;
30011     case 3:
30012       if (refToNode(tcRec.p->tcBlockref) != * (start + 1))
30013 	return false;
30014       start += 2;
30015       break;
30016     default:
30017       ndbassert(false);
30018       return false;
30019     }
30020   }
30021 
30022   if (start != end)
30023   {
30024     ndbassert(false);
30025     return false;
30026   }
30027 
30028   /**
30029    * Do print
30030    */
30031   Uint32 *temp = signal->theData + 25;
30032   memcpy(temp, signal->theData, 4 * len);
30033 
30034   char state[20];
30035   const char* op = "<Unknown>";
30036   if (tcRec.p->tcScanRec != RNIL)
30037   {
30038     ScanRecordPtr sp;
30039     sp.i = tcRec.p->tcScanRec;
30040     ndbrequire(c_scanRecordPool.getValidPtr(sp));
30041 
30042     if (sp.p->scanLockMode)
30043       op = "SCAN-EX";
30044     else if(sp.p->scanLockHold)
30045       op = "SCAN-SH";
30046     else
30047       op = "SCAN";
30048 
30049     switch(sp.p->scanState){
30050     case ScanRecord::WAIT_NEXT_SCAN:
30051       BaseString::snprintf(state, sizeof(state), "WaitNextScan");
30052       break;
30053     case ScanRecord::IN_QUEUE:
30054       BaseString::snprintf(state, sizeof(state), "InQueue");
30055       break;
30056     case ScanRecord::SCAN_FREE:
30057     case ScanRecord::WAIT_NEXT_SCAN_COPY:
30058     case ScanRecord::COPY_FRAG_HALTED:
30059     case ScanRecord::WAIT_ACC_COPY:
30060     case ScanRecord::WAIT_ACC_SCAN:
30061     case ScanRecord::WAIT_SCAN_NEXTREQ:
30062     case ScanRecord::WAIT_CLOSE_SCAN:
30063     case ScanRecord::WAIT_CLOSE_COPY:
30064     case ScanRecord::WAIT_TUPKEY_COPY:
30065     case ScanRecord::WAIT_LQHKEY_COPY:
30066     case ScanRecord::WAIT_START_QUEUED_SCAN:
30067     case ScanRecord::QUIT_START_QUEUE_SCAN:
30068       BaseString::snprintf(state, sizeof(state), "%u", sp.p->scanState);
30069       break;
30070     }
30071   }
30072   else
30073   {
30074     switch(tcRec.p->operation){
30075     case ZREAD:
30076       if (tcRec.p->lockType)
30077 	op = "READ-EX";
30078       else if(!tcRec.p->dirtyOp)
30079 	op = "READ-SH";
30080       else
30081 	op = "READ";
30082       break;
30083     case ZINSERT: op = "INSERT"; break;
30084     case ZUPDATE: op = "UPDATE"; break;
30085     case ZDELETE: op = "DELETE"; break;
30086     case ZWRITE: op = "WRITE"; break;
30087     case ZUNLOCK: op = "UNLOCK"; break;
30088     }
30089 
30090     switch(tcRec.p->transactionState){
30091     case TcConnectionrec::IDLE:
30092     case TcConnectionrec::WAIT_ACC:
30093       BaseString::snprintf(state, sizeof(state), "In lock queue");
30094       break;
30095     case TcConnectionrec::WAIT_TUPKEYINFO:
30096     case TcConnectionrec::WAIT_ATTR:
30097       BaseString::snprintf(state, sizeof(state), "WaitData");
30098       break;
30099     case TcConnectionrec::WAIT_TUP:
30100       BaseString::snprintf(state, sizeof(state), "Running");
30101       break;
30102     case TcConnectionrec::WAIT_TUP_COMMIT:
30103       BaseString::snprintf(state, sizeof(state), "Committing");
30104       break;
30105     case TcConnectionrec::PREPARED:
30106       BaseString::snprintf(state, sizeof(state), "Prepared");
30107       break;
30108     case TcConnectionrec::COMMITTED:
30109       BaseString::snprintf(state, sizeof(state), "Committed");
30110       break;
30111     case TcConnectionrec::LOG_QUEUED:
30112     case TcConnectionrec::LOG_COMMIT_WRITTEN_WAIT_SIGNAL:
30113     case TcConnectionrec::LOG_COMMIT_QUEUED_WAIT_SIGNAL:
30114     case TcConnectionrec::LOG_COMMIT_QUEUED:
30115     case TcConnectionrec::COMMIT_QUEUED:
30116     case TcConnectionrec::WAIT_ACC_ABORT:
30117     case TcConnectionrec::ABORT_QUEUED:
30118     case TcConnectionrec::WAIT_AI_AFTER_ABORT:
30119     case TcConnectionrec::LOG_ABORT_QUEUED:
30120     case TcConnectionrec::WAIT_TUP_TO_ABORT:
30121     case TcConnectionrec::WAIT_SCAN_AI:
30122     case TcConnectionrec::SCAN_STATE_USED:
30123     case TcConnectionrec::SCAN_TUPKEY:
30124     case TcConnectionrec::COPY_TUPKEY:
30125     case TcConnectionrec::TC_NOT_CONNECTED:
30126     case TcConnectionrec::PREPARED_RECEIVED_COMMIT:
30127     case TcConnectionrec::LOG_COMMIT_WRITTEN:
30128       BaseString::snprintf(state, sizeof(state), "%u",
30129 			   tcRec.p->transactionState);
30130     }
30131   }
30132 
30133   char buf[100];
30134   BaseString::snprintf(buf, sizeof(buf),
30135 		       "OP[%u]: Tab: %d frag: %d TC: %u API: %d(0x%x)"
30136 		       "transid: H'%.8x H'%.8x op: %s state: %s",
30137 		       tcRec.i,
30138 		       tcRec.p->tableref,
30139 		       tcRec.p->fragmentid,
30140 		       refToNode(tcRec.p->tcBlockref),
30141 		       refToNode(tcRec.p->applRef),
30142 		       refToBlock(tcRec.p->applRef),
30143 		       tcRec.p->transid[0], tcRec.p->transid[1],
30144 		       op,
30145 		       state);
30146 
30147   if (!ERROR_INSERTED(4002))
30148     infoEvent("%s", buf);
30149   else
30150     ndbout_c("%s", buf);
30151 
30152   memcpy(signal->theData, temp, 4*len);
30153   return true;
30154 }
30155 
30156 void
execDUMP_STATE_ORD(Signal * signal)30157 Dblqh::execDUMP_STATE_ORD(Signal* signal)
30158 {
30159   jamEntry();
30160   DumpStateOrd * const dumpState = (DumpStateOrd *)&signal->theData[0];
30161   Uint32 arg= dumpState->args[0];
30162 
30163 #if defined(VM_TRACE) || defined(ERROR_INSERT)
30164   if (signal->theData[0] == DumpStateOrd::LqhSetTransientPoolMaxSize)
30165   {
30166     jam();
30167     if (signal->getLength() < 3)
30168       return;
30169     const Uint32 pool_index = signal->theData[1];
30170     const Uint32 new_size = signal->theData[2];
30171     if (pool_index == DBLQH_OPERATION_RECORD_TRANSIENT_POOL_INDEX)
30172     {
30173       SET_ERROR_INSERT_VALUE(5031);
30174     }
30175     if (pool_index >= c_transient_pool_count)
30176       return;
30177     c_transient_pools[pool_index]->setMaxSize(new_size);
30178     return;
30179   }
30180   if (signal->theData[0] == DumpStateOrd::LqhResetTransientPoolMaxSize)
30181   {
30182     jam();
30183     if(signal->getLength() < 2)
30184       return;
30185     const Uint32 pool_index = signal->theData[1];
30186     if (pool_index == DBLQH_OPERATION_RECORD_TRANSIENT_POOL_INDEX)
30187     {
30188       CLEAR_ERROR_INSERT_VALUE;
30189     }
30190     if (pool_index >= c_transient_pool_count)
30191       return;
30192     c_transient_pools[pool_index]->resetMaxSize();
30193     return;
30194   }
30195 #endif
30196 
30197   if(dumpState->args[0] == DumpStateOrd::CommitAckMarkersSize){
30198     infoEvent("LQH: m_commitAckMarkerPool: %d free size: %d",
30199 	      m_commitAckMarkerPool.getNoOfFree(),
30200 	      m_commitAckMarkerPool.getSize());
30201   }
30202   if(dumpState->args[0] == DumpStateOrd::CommitAckMarkersDump){
30203     infoEvent("LQH: m_commitAckMarkerPool: %d free size: %d",
30204 	      m_commitAckMarkerPool.getNoOfFree(),
30205 	      m_commitAckMarkerPool.getSize());
30206 #ifdef ERROR_INSERT
30207     CommitAckMarkerIterator iter;
30208     for(m_commitAckMarkerHash.first(iter); iter.curr.i != RNIL;
30209 	m_commitAckMarkerHash.next(iter)){
30210       infoEvent("CommitAckMarker: i = %d (H'%.8x, H'%.8x)"
30211 		" ApiRef: 0x%x apiOprec: 0x%x TcRef: %x, ref_count: %u",
30212 		iter.curr.i,
30213 		iter.curr.p->transid1,
30214 		iter.curr.p->transid2,
30215 		iter.curr.p->apiRef,
30216 		iter.curr.p->apiOprec,
30217 		iter.curr.p->tcRef,
30218                 iter.curr.p->reference_count);
30219     }
30220 #endif
30221   }
30222 
30223   // Dump info about number of log pages
30224   if(dumpState->args[0] == DumpStateOrd::LqhDumpNoLogPages){
30225     infoEvent("LQH: Log pages : %d Free: %d",
30226 	      clogPageFileSize,
30227 	      cnoOfLogPages);
30228   }
30229 
30230   // Dump all defined tables that LQH knowns about
30231   if(dumpState->args[0] == DumpStateOrd::LqhDumpAllDefinedTabs){
30232     for(Uint32 i = 0; i<ctabrecFileSize; i++){
30233       TablerecPtr tabPtr;
30234       tabPtr.i = i;
30235       ptrAss(tabPtr, tablerec);
30236       if(tabPtr.p->tableStatus != Tablerec::NOT_DEFINED){
30237 	infoEvent("Table %d Status: %d Usage: [ r: %u w: %u ]",
30238 		  i, tabPtr.p->tableStatus,
30239                   tabPtr.p->usageCountR, tabPtr.p->usageCountW);
30240 
30241 	for (Uint32 j = 0; j<NDB_ARRAY_SIZE(tabPtr.p->fragrec); j++)
30242 	{
30243 	  FragrecordPtr fragPtr;
30244 	  if ((fragPtr.i = tabPtr.p->fragrec[j]) != RNIL)
30245 	  {
30246 	    c_fragment_pool.getPtr(fragPtr);
30247 	    infoEvent("  frag: %d distKey: %u",
30248 		      tabPtr.p->fragid[j],
30249 		      fragPtr.p->fragDistributionKey);
30250 	  }
30251 	}
30252       }
30253     }
30254     return;
30255   }
30256 
30257   if (dumpState->args[0] == DumpStateOrd::LqhDumpOneCopyTcRec)
30258   {
30259     TcConnectionrecPtr tcRec;
30260     tcRec.i = signal->theData[1];
30261     if (!tcConnect_pool.getValidPtr(tcRec))
30262     {
30263       jam();
30264       return;
30265     }
30266     g_eventLogger->info("Copy TC record: tab(%u), error: %u,"
30267                         " copyCountWords: %u",
30268                         tcRec.p->tableref,
30269                         tcRec.p->errorCode,
30270                         tcRec.p->copyCountWords);
30271     return;
30272   }
30273   // Dump all ScanRecords
30274   if ((dumpState->args[0] == DumpStateOrd::LqhDumpAllScanRec) ||
30275       (dumpState->args[0] == DumpStateOrd::LqhDumpAllActiveScanRec))
30276   {
30277     Uint32 recordNo = 0;
30278     if (signal->length() == 1)
30279       infoEvent("LQH: Dump all ScanRecords");
30280     else if (signal->length() == 2)
30281       recordNo = dumpState->args[1];
30282     else
30283       return;
30284 
30285     ScanRecordPtr loc_scanptr;
30286     if (getNextScanRec(recordNo, loc_scanptr, 10) == 1)
30287     {
30288       dumpState->args[0] = DumpStateOrd::LqhDumpOneScanRec;
30289       dumpState->args[1] = loc_scanptr.i;
30290       execDUMP_STATE_ORD(signal);
30291     }
30292 
30293     if (recordNo < RNIL)
30294     {
30295       dumpState->args[0] = DumpStateOrd::LqhDumpAllScanRec;
30296       dumpState->args[1] = recordNo;
30297       sendSignal(reference(), GSN_DUMP_STATE_ORD, signal, 2, JBB);
30298     }
30299     return;
30300   }
30301 
30302   if(dumpState->args[0] == DumpStateOrd::LqhDumpOneScanRec)
30303   {
30304     Uint32 recordNo = RNIL;
30305     if (signal->length() == 2)
30306     {
30307       jam();
30308       recordNo = dumpState->args[1];
30309     }
30310     else
30311     {
30312       jam();
30313       return;
30314     }
30315 
30316     ScanRecordPtr sp;
30317     sp.i = recordNo;
30318     if (!c_scanRecordPool.getValidPtr(sp))
30319     {
30320       jam();
30321       return;
30322     }
30323     jam();
30324 
30325     g_eventLogger->info("Dblqh::ScanRecord[%d]: state=%d, type=%d, "
30326 	      "complStatus=%d, scanNodeId=%d",
30327 	      sp.i,
30328 	      sp.p->scanState,
30329 	      sp.p->scanType,
30330 	      sp.p->scanCompletedStatus,
30331 	      sp.p->scanNodeId);
30332     g_eventLogger->info(" apiBref=0x%x, scanAccPtr=%d",
30333 	      sp.p->scanApiBlockref,
30334 	      sp.p->scanAccPtr);
30335     g_eventLogger->info(" copyptr=%d, ailen=%d, complOps=%d, concurrOps=%d",
30336 	      sp.p->copyPtr,
30337 	      sp.p->scanAiLength,
30338 	      sp.p->m_curr_batch_size_rows,
30339 	      sp.p->m_max_batch_size_rows);
30340     g_eventLogger->info(" errCnt=%d, schV=%d",
30341 	      sp.p->scanErrorCounter,
30342 	      sp.p->scanSchemaVersion);
30343     g_eventLogger->info(" stpid=%d, flag=%d, lhold=%d, lmode=%d, num=%d",
30344 	      sp.p->scanStoredProcId,
30345 	      sp.p->scanFlag,
30346 	      sp.p->scanLockHold,
30347 	      sp.p->scanLockMode,
30348 	      sp.p->scanNumber);
30349     g_eventLogger->info(" relCount=%d, TCwait=%d, TCRec=%d, KIflag=%d",
30350 	      sp.p->scanReleaseCounter,
30351 	      sp.p->scanTcWaiting,
30352 	      sp.p->scanTcrec,
30353 	      sp.p->scanKeyinfoFlag);
30354     g_eventLogger->info(" LcpScan=%d  RowId(%u:%u)",
30355               sp.p->lcpScan,
30356               sp.p->m_row_id.m_page_no,
30357               sp.p->m_row_id.m_page_idx);
30358     g_eventLogger->info("scan_lastSeen=%d, scan_check_lcp_stop=%u",
30359               sp.p->scan_lastSeen,
30360               sp.p->scan_check_lcp_stop);
30361     return;
30362   }
30363   if(dumpState->args[0] == DumpStateOrd::LqhDumpLcpState){
30364 
30365     infoEvent("== LQH LCP STATE ==");
30366     infoEvent(" clcpCompletedState=%d, c_lcpId=%d, cnoOfFragsCheckpointed=%d",
30367 	      clcpCompletedState,
30368 	      c_lcpId,
30369 	      cnoOfFragsCheckpointed);
30370 
30371     LcpRecordPtr TlcpPtr;
30372     // Print information about the current local checkpoint
30373     TlcpPtr.i = 0;
30374     ptrAss(TlcpPtr, lcpRecord);
30375     infoEvent(" lcpPrepareState=%d lcpRunState=%d lastFragmentFlag=%d",
30376 	      TlcpPtr.p->lcpPrepareState,
30377               TlcpPtr.p->lcpRunState,
30378               TlcpPtr.p->lastFragmentFlag);
30379 
30380     infoEvent("currentPrepareFragment.fragPtrI=%d",
30381 	      TlcpPtr.p->currentPrepareFragment.fragPtrI);
30382     infoEvent("currentPrepareFragment.lcpFragOrd.tableId=%d",
30383 	      TlcpPtr.p->currentPrepareFragment.lcpFragOrd.tableId);
30384     infoEvent("currentPrepareFragment.lcpFragOrd.fragmentId=%d",
30385 	      TlcpPtr.p->currentPrepareFragment.lcpFragOrd.fragmentId);
30386 
30387     infoEvent("currentRunFragment.fragPtrI=%d",
30388 	      TlcpPtr.p->currentRunFragment.fragPtrI);
30389     infoEvent("currentRunFragment.lcpFragOrd.tableId=%d",
30390 	      TlcpPtr.p->currentRunFragment.lcpFragOrd.tableId);
30391     infoEvent("currentRunFragment.lcpFragOrd.fragmentId=%d",
30392 	      TlcpPtr.p->currentRunFragment.lcpFragOrd.fragmentId);
30393 
30394     if ((signal->length() == 2) &&
30395         (dumpState->args[1] == 0))
30396     {
30397       /* Dump reserved LCP scan rec */
30398       /* As there's only one, we'll do a tight loop here */
30399       infoEvent(" dumping reserved scan records");
30400       for (Uint32 rec=0; rec < 3; rec++)
30401       {
30402         ScanRecordPtr sp;
30403         sp.i = rec;
30404         ndbrequire(c_scanRecordPool.getValidPtr(sp));
30405         ndbrequire(sp.p->m_reserved);
30406 
30407         if (sp.p->lcpScan)
30408         {
30409           dumpState->args[0] = DumpStateOrd::LqhDumpOneScanRec;
30410           dumpState->args[1] = rec;
30411           execDUMP_STATE_ORD(signal);
30412         }
30413       }
30414     }
30415     return;
30416   }
30417   if (dumpState->args[0] == DumpStateOrd::LQHLogFileInitStatus){
30418      reportStatus(signal);
30419      return;
30420   }
30421 
30422 #ifdef ERROR_INSERT
30423 #ifdef NDB_DEBUG_FULL
30424   if(dumpState->args[0] == DumpStateOrd::LCPContinue){
30425     switch(cerrorInsert){
30426     case 5904:
30427       CLEAR_ERROR_INSERT_VALUE;
30428       g_trace_lcp.restore(*globalData.getBlock(BACKUP), signal);
30429       return;
30430     default:
30431       return;
30432     }
30433   }
30434 #endif
30435 #endif
30436 
30437   if(arg == DumpStateOrd::LqhSystemError ||
30438      arg == DumpStateOrd::LqhFailedHandlingGCP_SAVEREQ)
30439   {
30440     jam();
30441 
30442     // logPartRecord is initialised in start phase 1
30443     if ((getNodeState().startLevel < NodeState::SL_STARTING) ||
30444         ((getNodeState().startLevel == NodeState::SL_STARTING) && (getNodeState().starting.startPhase < 2)))
30445     {
30446       jam();
30447       return ;
30448     }
30449 
30450     Uint32 i;
30451     void * logPartPtr = 0;
30452     (void)logPartPtr;
30453     GcpRecordPtr gcp; gcp.i = RNIL;
30454     for(i = 0; i < clogPartFileSize; i++)
30455     {
30456       Ptr<LogPartRecord> lp;
30457       lp.i = i;
30458       ptrCheckGuard(lp, clogPartFileSize, logPartRecord);
30459       ndbout_c("LP %d blockInstance: %d partNo: %d state: %d WW_Gci: %d gcprec: %d flq: %u %u currfile: %d tailFileNo: %d logTailMbyte: %d cnoOfLogPages: %u problems: 0x%x",
30460                i,
30461                instance(),
30462                lp.p->logPartNo,
30463 	       lp.p->logPartState,
30464 	       lp.p->waitWriteGciLog,
30465 	       lp.p->gcprec,
30466 	       lp.p->m_log_prepare_queue.firstElement,
30467 	       lp.p->m_log_complete_queue.firstElement,
30468 	       lp.p->currentLogfile,
30469 	       lp.p->logTailFileNo,
30470 	       lp.p->logTailMbyte,
30471                cnoOfLogPages,
30472                lp.p->m_log_problems);
30473 
30474       if(gcp.i == RNIL && lp.p->gcprec != RNIL)
30475 	gcp.i = lp.p->gcprec;
30476 
30477       LogFileRecordPtr logFilePtr;
30478       Uint32 first= logFilePtr.i= lp.p->firstLogfile;
30479       do
30480       {
30481 	ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
30482 	ndbout_c("  file %d(%d)  FileChangeState: %d  logFileStatus: %d  currentMbyte: %d  currentFilepage %d",
30483 		 logFilePtr.p->fileNo,
30484 		 logFilePtr.i,
30485 		 logFilePtr.p->fileChangeState,
30486 		 logFilePtr.p->logFileStatus,
30487 		 logFilePtr.p->currentMbyte,
30488 		 logFilePtr.p->currentFilepage);
30489 	logFilePtr.i = logFilePtr.p->nextLogFile;
30490       } while(logFilePtr.i != first);
30491     }
30492 
30493     if(gcp.i != RNIL)
30494     {
30495       ptrCheckGuard(gcp, cgcprecFileSize, gcpRecord);
30496       for(i = 0; i<4; i++)
30497       {
30498 	ndbout_c("  GCP %d file: %d state: %d sync: %d page: %d word: %d",
30499 		 i, gcp.p->gcpFilePtr[i], gcp.p->gcpLogPartState[i],
30500 		 gcp.p->gcpSyncReady[i],
30501 		 gcp.p->gcpPageNo[i],
30502 		 gcp.p->gcpWordNo[i]);
30503       }
30504     }
30505 
30506     if(arg== DumpStateOrd::LqhFailedHandlingGCP_SAVEREQ)
30507     {
30508       if (ERROR_INSERTED(5085))
30509       {
30510         g_eventLogger->info("LQH instance %u ignoring DUMP 2305 (GCP_STOP kill)",
30511                             instance());
30512         return;
30513       }
30514       CRASH_INSERTION(5087);
30515       progError(__LINE__, NDBD_EXIT_SYSTEM_ERROR,
30516 		"Please report this as a bug. "
30517 		"Provide as much info as possible, expecially all the "
30518 		"ndb_*_out.log files, Thanks. "
30519 		"Shutting down node due to failed handling of GCP_SAVEREQ");
30520 
30521     }
30522   }
30523 
30524   if (dumpState->args[0] == DumpStateOrd::LqhErrorInsert5042 && (signal->getLength() >= 2))
30525   {
30526     c_error_insert_table_id = dumpState->args[1];
30527     if (signal->getLength() == 2)
30528     {
30529       SET_ERROR_INSERT_VALUE(5042);
30530     }
30531     else
30532     {
30533       SET_ERROR_INSERT_VALUE(dumpState->args[2]);
30534     }
30535   }
30536 
30537   if(arg == DumpStateOrd::LqhDumpAllTcRec)
30538   {
30539     Uint32 bucketLen[TRANSID_HASH_SIZE];
30540     for(Uint32 i = 0; i<TRANSID_HASH_SIZE; i++)
30541     {
30542       TcConnectionrecPtr tcRec;
30543       tcRec.i = ctransidHash[i];
30544       bucketLen[i] = 0;
30545       while(tcRec.i != RNIL)
30546       {
30547         ndbrequire(tcConnect_pool.getValidPtr(tcRec));
30548 	ndbout << "TcConnectionrec " << tcRec.i;
30549 	signal->theData[0] = DumpStateOrd::LqhDumpOneTcRec;
30550 	signal->theData[1] = tcRec.i;
30551 	execDUMP_STATE_ORD(signal);
30552 	tcRec.i = tcRec.p->nextHashRec;
30553         bucketLen[i]++;
30554       }
30555     }
30556     ndbout << "LQH transid hash bucket lengths : " << endl;
30557     for (Uint32 i = 0; i < TRANSID_HASH_SIZE; i++)
30558     {
30559       if (bucketLen[i] > 0)
30560       {
30561         ndbout << " bucket " << i << " len " << bucketLen[i] << endl;
30562       }
30563     }
30564     ndbout << "Done." << endl;
30565   }
30566 
30567   if (arg == DumpStateOrd::LqhDumpOneTcRec || arg == 2308)
30568   {
30569     TcConnectionrecPtr tcRec;
30570     tcRec.i = signal->theData[1];
30571     if (!tcConnect_pool.getValidPtr(tcRec))
30572     {
30573       jam();
30574       return;
30575     }
30576 
30577     ndbout << " transactionState = " << tcRec.p->transactionState<<endl;
30578     ndbout << " operation = " << tcRec.p->operation<<endl;
30579     ndbout << " tcNodeFailrec = " << tcRec.p->tcNodeFailrec
30580 	   << " seqNoReplica = " << tcRec.p->seqNoReplica
30581 	   << endl;
30582     ndbout << " replicaType = " << tcRec.p->replicaType
30583 	   << " reclenAiLqhkey = " << tcRec.p->reclenAiLqhkey
30584 	   << " opExec = " << tcRec.p->opExec
30585 	   << endl;
30586     ndbout << " opSimple = " << tcRec.p->opSimple
30587 	   << " nextSeqNoReplica = " << tcRec.p->nextSeqNoReplica
30588 	   << " lockType = " << tcRec.p->lockType
30589 	   << endl;
30590     ndbout << " lastReplicaNo = " << tcRec.p->lastReplicaNo
30591 	   << " indTakeOver = " << tcRec.p->indTakeOver
30592 	   << " dirtyOp = " << tcRec.p->dirtyOp
30593 	   << endl;
30594     ndbout << " activeCreat = " << tcRec.p->activeCreat
30595 	   << " tcBlockref = " << hex << tcRec.p->tcBlockref
30596 	   << " primKeyLen = " << tcRec.p->primKeyLen
30597 	   << " nrcopyflag = " << LqhKeyReq::getNrCopyFlag(tcRec.p->reqinfo)
30598 	   << endl;
30599     ndbout << " nextReplica = " << tcRec.p->nextReplica
30600 	   << " tcBlockref = " << hex << tcRec.p->tcBlockref
30601 	   << " reqBlockref = " << hex << tcRec.p->reqBlockref
30602 	   << " primKeyLen = " << tcRec.p->primKeyLen
30603 	   << endl;
30604     ndbout << " logStopPageNo = " << tcRec.p->logStopPageNo
30605 	   << " logStartPageNo = " << tcRec.p->logStartPageNo
30606 	   << " logStartPageIndex = " << tcRec.p->logStartPageIndex
30607 	   << endl;
30608     ndbout << " errorCode = " << tcRec.p->errorCode
30609 	   << " clientBlockref = " << hex << tcRec.p->clientBlockref
30610 	   << " applRef = " << hex << tcRec.p->applRef
30611 	   << " totSendlenAi = " << tcRec.p->totSendlenAi
30612 	   << endl;
30613     ndbout << " totReclenAi = " << tcRec.p->totReclenAi
30614 	   << " tcScanRec = " << tcRec.p->tcScanRec
30615 	   << " tcScanInfo = " << tcRec.p->tcScanInfo
30616 	   << " tcOprec = " << hex << tcRec.p->tcOprec
30617 	   << endl;
30618     ndbout << " tableref = " << tcRec.p->tableref
30619 	   << " schemaVersion = " << tcRec.p->schemaVersion
30620 	   << endl;
30621     ndbout << " reqinfo = " << tcRec.p->reqinfo
30622 	   << " reqRef = " << tcRec.p->reqRef
30623 	   << " readlenAi = " << tcRec.p->readlenAi
30624 	   << endl;
30625     ndbout << " prevLogTcrec = " << tcRec.p->prevLogTcrec
30626 	   << " prevHashRec = " << tcRec.p->prevHashRec
30627 	   << " nodeAfterNext0 = " << tcRec.p->nodeAfterNext[0]
30628 	   << " nodeAfterNext1 = " << tcRec.p->nodeAfterNext[1]
30629 	   << endl;
30630     ndbout << " nextTcConnectrec = " << tcRec.p->nextTcConnectrec
30631 	   << " nextTcLogQueue = " << tcRec.p->nextTcLogQueue
30632 	   << " prevTcLogQueue = " << tcRec.p->prevTcLogQueue
30633 	   << " nextLogTcrec = " << tcRec.p->nextLogTcrec
30634 	   << endl;
30635     ndbout << " nextHashRec = " << tcRec.p->nextHashRec
30636 	   << " logWriteState = " << tcRec.p->logWriteState
30637 	   << " logStartFileNo = " << tcRec.p->logStartFileNo
30638 	   << endl;
30639     ndbout << " gci_hi = " << tcRec.p->gci_hi
30640            << " gci_lo = " << tcRec.p->gci_lo
30641 	   << " fragmentptr = " << tcRec.p->fragmentptr
30642 	   << " fragmentid = " << tcRec.p->fragmentid
30643 	   << endl;
30644     ndbout << " hashValue = " << tcRec.p->hashValue
30645            << " currTupAiLen = " << tcRec.p->currTupAiLen
30646 	   << " currReclenAi = " << tcRec.p->currReclenAi
30647 	   << endl;
30648     ndbout << " tcTimer = " << tcRec.p->tcTimer
30649 	   << " clientConnectrec = " << tcRec.p->clientConnectrec
30650 	   << " applOprec = " << hex << tcRec.p->applOprec
30651 	   << " abortState = " << tcRec.p->abortState
30652 	   << endl;
30653     ndbout << " transid0 = " << hex << tcRec.p->transid[0]
30654 	   << " transid1 = " << hex << tcRec.p->transid[1]
30655 	   << " key[0] = " << getKeyInfoWordOrZero(tcRec.p, 0)
30656 	   << " key[1] = " << getKeyInfoWordOrZero(tcRec.p, 1)
30657 	   << endl;
30658     ndbout << " key[2] = " << getKeyInfoWordOrZero(tcRec.p, 2)
30659 	   << " key[3] = " << getKeyInfoWordOrZero(tcRec.p, 3)
30660 	   << " m_nr_delete.m_cnt = " << tcRec.p->m_nr_delete.m_cnt
30661 	   << endl;
30662     switch (tcRec.p->transactionState) {
30663 
30664     case TcConnectionrec::SCAN_STATE_USED:
30665       if (tcRec.p->tcScanRec != RNIL)
30666       {
30667 	ScanRecordPtr TscanPtr;
30668         TscanPtr.i = tcRec.p->tcScanRec;
30669 	ndbrequire(c_scanRecordPool.getValidPtr(TscanPtr));
30670 	ndbout << " scanState = " << TscanPtr.p->scanState << endl;
30671 	//TscanPtr.p->scanLocalref[2];
30672 	ndbout << " copyPtr="<<TscanPtr.p->copyPtr
30673 	       << " scanAccPtr="<<TscanPtr.p->scanAccPtr
30674 	       << " scanAiLength="<<TscanPtr.p->scanAiLength
30675 	       << endl;
30676 	ndbout << " m_curr_batch_size_rows="<<
30677 	  TscanPtr.p->m_curr_batch_size_rows
30678 	       << " m_max_batch_size_rows="<<
30679 	  TscanPtr.p->m_max_batch_size_rows
30680 	       << " scanErrorCounter="<<TscanPtr.p->scanErrorCounter
30681 	       << endl;
30682 	ndbout << " scanSchemaVersion="<<TscanPtr.p->scanSchemaVersion
30683 	       << "  scanStoredProcId="<<TscanPtr.p->scanStoredProcId
30684 	       << "  scanTcrec="<<TscanPtr.p->scanTcrec
30685 	       << endl;
30686 	ndbout << "  scanType="<<TscanPtr.p->scanType
30687 	       << "  scanApiBlockref="<<TscanPtr.p->scanApiBlockref
30688 	       << "  scanNodeId="<<TscanPtr.p->scanNodeId
30689 	       << "  scanCompletedStatus="<<TscanPtr.p->scanCompletedStatus
30690 	       << endl;
30691 	ndbout << "  scanFlag="<<TscanPtr.p->scanFlag
30692 	       << "  scanLockHold="<<TscanPtr.p->scanLockHold
30693 	       << "  scanLockMode="<<TscanPtr.p->scanLockMode
30694 	       << "  scanNumber="<<TscanPtr.p->scanNumber
30695 	       << endl;
30696 	ndbout << "  scanReleaseCounter="<<TscanPtr.p->scanReleaseCounter
30697 	       << "  scanTcWaiting="<<TscanPtr.p->scanTcWaiting
30698 	       << "  scanKeyinfoFlag="<<TscanPtr.p->scanKeyinfoFlag
30699 	       << endl;
30700       } else{
30701 	ndbout << "No connected scan record found" << endl;
30702       }
30703       break;
30704     default:
30705       break;
30706     }
30707     ndbrequire(arg != 2308);
30708   }
30709 
30710 #ifdef NDBD_TRACENR
30711   if (arg == 5712 || arg == 5713)
30712   {
30713     if (arg == 5712)
30714     {
30715       traceopout = &ndbout;
30716     }
30717     else if (arg == 5713)
30718     {
30719       traceopout = tracenrout;
30720     }
30721     SET_ERROR_INSERT_VALUE(arg);
30722   }
30723 #endif
30724 
30725   if (arg == 2350)
30726   {
30727     jam();
30728     Uint32 len = signal->getLength() - 1;
30729     if (len + 3 > 25)
30730     {
30731       jam();
30732       infoEvent("Too long filter");
30733       return;
30734     }
30735     if (validate_filter(signal))
30736     {
30737       jam();
30738       memmove(signal->theData + 3, signal->theData + 1, 4 * len);
30739       signal->theData[0] = 2351;
30740       signal->theData[1] = 0;    // Bucket
30741       signal->theData[2] = RNIL; // Record
30742       sendSignal(reference(), GSN_DUMP_STATE_ORD, signal, len + 3, JBB);
30743 
30744       infoEvent("Starting dump of operations");
30745     }
30746     return;
30747   }
30748 
30749   if (arg == 2351)
30750   {
30751     jam();
30752     Uint32 bucket = signal->theData[1];
30753     Uint32 record = signal->theData[2];
30754     Uint32 len = signal->getLength();
30755     TcConnectionrecPtr tcRec;
30756     if (record != RNIL)
30757     {
30758       jam();
30759       /**
30760        * Check that record is still in use...
30761        */
30762       tcRec.i = record;
30763       if (!tcConnect_pool.getValidPtr(tcRec))
30764       {
30765         jam();
30766         record = RNIL;
30767       }
30768       else
30769       {
30770         Uint32 hashIndex = (tcRec.p->transid[0] ^ tcRec.p->tcOprec) &
30771                             (TRANSID_HASH_SIZE - 1);
30772         if (hashIndex != bucket)
30773         {
30774 	  jam();
30775 	  record = RNIL;
30776         }
30777         else
30778         {
30779 	  jam();
30780 	  if (tcRec.p->nextHashRec == RNIL &&
30781 	      tcRec.p->prevHashRec == RNIL &&
30782 	      ctransidHash[hashIndex] != record)
30783 	  {
30784 	    jam();
30785 	    record = RNIL;
30786 	  }
30787         }
30788       }
30789 
30790       if (record == RNIL)
30791       {
30792 	jam();
30793 	signal->theData[2] = RNIL;
30794 	sendSignal(reference(), GSN_DUMP_STATE_ORD, signal,
30795 		   signal->getLength(), JBB);
30796 	return;
30797       }
30798     }
30799     else if ((record = ctransidHash[bucket]) == RNIL)
30800     {
30801       jam();
30802       bucket++;
30803       if (bucket < TRANSID_HASH_SIZE)
30804       {
30805 	jam();
30806 	signal->theData[1] = bucket;
30807 	signal->theData[2] = RNIL;
30808 	sendSignal(reference(), GSN_DUMP_STATE_ORD, signal,
30809 		   signal->getLength(), JBB);
30810       }
30811       else
30812       {
30813 	jam();
30814         infoEvent("End of operation dump");
30815         if (ERROR_INSERTED(4002))
30816         {
30817           ndbabort();
30818         }
30819       }
30820 
30821       return;
30822     }
30823     else
30824     {
30825       jam();
30826       tcRec.i = record;
30827       ndbrequire(tcConnect_pool.getValidPtr(tcRec));
30828     }
30829 
30830     for (Uint32 i = 0; i<32; i++)
30831     {
30832       jam();
30833       bool print = match_and_print(signal, tcRec);
30834 
30835       tcRec.i = tcRec.p->nextHashRec;
30836       if (tcRec.i == RNIL || print)
30837       {
30838 	jam();
30839 	break;
30840       }
30841       ndbrequire(tcConnect_pool.getValidPtr(tcRec));
30842     }
30843 
30844     if (tcRec.i == RNIL)
30845     {
30846       jam();
30847       bucket++;
30848       if (bucket < TRANSID_HASH_SIZE)
30849       {
30850 	jam();
30851 	signal->theData[1] = bucket;
30852 	signal->theData[2] = RNIL;
30853 	sendSignal(reference(), GSN_DUMP_STATE_ORD, signal, len, JBB);
30854       }
30855       else
30856       {
30857 	jam();
30858         infoEvent("End of operation dump");
30859         if (ERROR_INSERTED(4002))
30860         {
30861           ndbabort();
30862         }
30863       }
30864 
30865       return;
30866     }
30867     else
30868     {
30869       jam();
30870       signal->theData[2] = tcRec.i;
30871       sendSignalWithDelay(reference(), GSN_DUMP_STATE_ORD, signal, 200, len);
30872       return;
30873     }
30874   }
30875 
30876   if (arg == 2352 && signal->getLength() == 2)
30877   {
30878     jam();
30879     Uint32 opNo = signal->theData[1];
30880     TcConnectionrecPtr tcRec;
30881     tcRec.i = opNo;
30882     if (tcConnect_pool.getValidPtr(tcRec))
30883     {
30884       jam();
30885       BaseString key;
30886       if (tcRec.p->keyInfoIVal != RNIL)
30887       {
30888         jam();
30889         SectionReader keyInfoReader(tcRec.p->keyInfoIVal,
30890                                     g_sectionSegmentPool);
30891 
30892         Uint32 keyWord;
30893         while (keyInfoReader.getWord(&keyWord))
30894           key.appfmt("0x%x ", keyWord);
30895       }
30896 
30897       char buf[100];
30898       BaseString::snprintf(buf, sizeof(buf),
30899 			   "OP[%u]: transid: 0x%x 0x%x key: %s",
30900 			   tcRec.i,
30901 			   tcRec.p->transid[0], tcRec.p->transid[1], key.c_str());
30902       infoEvent("%s", buf);
30903     }
30904   }
30905 
30906   if (arg == DumpStateOrd::SchemaResourceSnapshot)
30907   {
30908     RSS_AP_SNAPSHOT_SAVE(c_fragment_pool);
30909     return;
30910   }
30911 
30912   if (arg == DumpStateOrd::SchemaResourceCheckLeak)
30913   {
30914     RSS_AP_SNAPSHOT_CHECK(c_fragment_pool);
30915     return;
30916   }
30917 
30918   if (arg == 4002)
30919   {
30920     bool ops = false;
30921     for (Uint32 i = 0; i<TRANSID_HASH_SIZE; i++)
30922     {
30923       if (ctransidHash[i] != RNIL)
30924       {
30925         jam();
30926         ops = true;
30927         break;
30928       }
30929     }
30930 
30931 #ifdef ERROR_INSERT
30932     bool markers = m_commitAckMarkerPool.getNoOfFree() !=
30933       m_commitAckMarkerPool.getSize();
30934     if (unlikely(ops || markers))
30935     {
30936 
30937       if (markers)
30938       {
30939         ndbout_c("LQH: m_commitAckMarkerPool: %d free size: %d",
30940                  m_commitAckMarkerPool.getNoOfFree(),
30941                  m_commitAckMarkerPool.getSize());
30942 
30943         CommitAckMarkerIterator iter;
30944         for(m_commitAckMarkerHash.first(iter); iter.curr.i != RNIL;
30945             m_commitAckMarkerHash.next(iter))
30946         {
30947           ndbout_c("CommitAckMarker: i = %d (H'%.8x, H'%.8x)"
30948                    " ApiRef: 0x%x apiOprec: 0x%x TcRef: %x ref_count: %u",
30949                    iter.curr.i,
30950                    iter.curr.p->transid1,
30951                    iter.curr.p->transid2,
30952                    iter.curr.p->apiRef,
30953                    iter.curr.p->apiOprec,
30954                    iter.curr.p->tcRef,
30955                    iter.curr.p->reference_count);
30956         }
30957       }
30958       SET_ERROR_INSERT_VALUE(4002);
30959       signal->theData[0] = 2350;
30960       EXECUTE_DIRECT(DBLQH, GSN_DUMP_STATE_ORD, signal, 1);
30961     }
30962 #endif
30963   }
30964 
30965   if(arg == 2399)
30966   {
30967     jam();
30968 
30969     if (cstartRecReq < SRR_REDO_COMPLETE)
30970     {
30971       jam();
30972       return;
30973     }
30974 
30975     for(Uint32 i = 0; i < clogPartFileSize; i++)
30976     {
30977       logPartPtr.i = i;
30978       ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
30979       LogFileRecordPtr logFile;
30980       logFile.i = logPartPtr.p->currentLogfile;
30981       ptrCheckGuard(logFile, clogFileFileSize, logFileRecord);
30982 
30983       LogPosition head = { logFile.p->fileNo, logFile.p->currentMbyte };
30984       LogPosition tail = { logPartPtr.p->logTailFileNo,
30985                            logPartPtr.p->logTailMbyte};
30986       Uint64 mb = free_log(head, tail, logPartPtr.p->noLogFiles, clogFileSize);
30987       Uint64 total = logPartPtr.p->noLogFiles * Uint64(clogFileSize);
30988       signal->theData[0] = NDB_LE_RedoStatus;
30989       signal->theData[1] = logPartPtr.p->logPartNo;
30990       signal->theData[2] = head.m_file_no;
30991       signal->theData[3] = head.m_mbyte;
30992       signal->theData[4] = tail.m_file_no;
30993       signal->theData[5] = tail.m_mbyte;
30994       signal->theData[6] = Uint32(total >> 32);
30995       signal->theData[7] = Uint32(total);
30996       signal->theData[8] = Uint32(mb >> 32);
30997       signal->theData[9] = Uint32(mb);
30998       signal->theData[10] = logPartPtr.p->noLogFiles;
30999       signal->theData[11] = clogFileSize;
31000       sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 12, JBB);
31001     }
31002   }
31003 
31004   if(arg == 2398)
31005   {
31006     jam();
31007 
31008     if (cstartRecReq < SRR_REDO_COMPLETE)
31009     {
31010       jam();
31011       return;
31012     }
31013 
31014     for(Uint32 i = 0; i<clogPartFileSize; i++)
31015     {
31016       logPartPtr.i = i;
31017       ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
31018       LogFileRecordPtr logFile;
31019       logFile.i = logPartPtr.p->currentLogfile;
31020       ptrCheckGuard(logFile, clogFileFileSize, logFileRecord);
31021 
31022       LogPosition head = { logFile.p->fileNo, logFile.p->currentMbyte };
31023       LogPosition tail = { logPartPtr.p->logTailFileNo,
31024                            logPartPtr.p->logTailMbyte};
31025       Uint64 mb = free_log(head, tail, logPartPtr.p->noLogFiles, clogFileSize);
31026       Uint64 total = logPartPtr.p->noLogFiles * Uint64(clogFileSize);
31027       ndbout_c("REDO part: %u HEAD: file: %u mbyte: %u TAIL: file: %u mbyte: %u total: %llu free: %llu (mb)",
31028                logPartPtr.p->logPartNo,
31029                head.m_file_no, head.m_mbyte,
31030                tail.m_file_no, tail.m_mbyte,
31031                total, mb);
31032     }
31033   }
31034 
31035 #if defined VM_TRACE || defined ERROR_INSERT
31036   if (arg == 2396 && signal->length() == 2)
31037       cmaxLogFilesInPageZero_DUMP = dumpState->args[1];
31038 #endif
31039 
31040   if (arg == 2397)
31041   {
31042     /* Send LCP_STATUS_REQ to BACKUP */
31043     LcpStatusReq* req = (LcpStatusReq*) signal->getDataPtr();
31044     req->senderRef = reference();
31045     req->senderData = 0;
31046 
31047     BlockReference backupRef = calcInstanceBlockRef(BACKUP);
31048     sendSignal(backupRef, GSN_LCP_STATUS_REQ, signal,
31049                LcpStatusReq::SignalLength, JBB);
31050   }
31051 
31052   if (arg == 2395)
31053   {
31054     ndbout_c("LCPFragWatchdog : WarnElapsed : %u(ms) MaxElapsed %u(ms) "
31055              ": period millis : %u",
31056              c_lcpFragWatchdog.WarnElapsedWithNoProgressMillis,
31057              c_lcpFragWatchdog.MaxElapsedWithNoProgressMillis,
31058              LCPFragWatchdog::PollingPeriodMillis);
31059     return;
31060   }
31061 
31062   if(arg == 2309)
31063   {
31064     if (ERROR_INSERTED(5086))
31065     {
31066       g_eventLogger->info("LQH instance %u discards DUMP 2309",
31067                           instance());
31068       return;
31069     }
31070 
31071     CRASH_INSERTION(5075);
31072 
31073     progError(__LINE__, NDBD_EXIT_LCP_SCAN_WATCHDOG_FAIL,
31074               "Please report this as a bug. "
31075               "Provide as much info as possible, expecially all the "
31076               "ndb_*_out.log files, Thanks. "
31077               "Shutting down node due to lack of LCP fragment scan progress");
31078   }
31079 
31080 
31081   if (arg == 4003)
31082   {
31083 #ifdef ERROR_INSERT
31084     ndbrequire(m_commitAckMarkerPool.getNoOfFree() ==
31085                m_commitAckMarkerPool.getSize());
31086 #endif
31087   }
31088   if (arg == 5050)
31089   {
31090 #ifdef ERROR_INSERT
31091     SET_ERROR_INSERT_VALUE2(5050, c_master_node_id);
31092 #endif
31093   }
31094 
31095   if (arg == DumpStateOrd::LqhDumpPoolLevels)
31096   {
31097     /* Dump some state info for internal buffers */
31098     if (signal->getLength() == 1)
31099     {
31100       signal->theData[1] = 1;
31101       signal->theData[2] = 0;
31102       signal->theData[3] = 0;
31103       sendSignal(reference(), GSN_DUMP_STATE_ORD, signal, 4, JBB);
31104       return;
31105     }
31106     if (signal->getLength() != 4)
31107     {
31108       ndbout_c("DUMP LqhDumpPoolLevels : Bad signal length : %u", signal->getLength());
31109       return;
31110     }
31111 
31112     Uint32 resource = signal->theData[1];
31113     Uint32 position = signal->theData[2];
31114     Uint32 sum = signal->theData[3];
31115     /*const Uint32 MAX_ITER = 200; */
31116 
31117     switch(resource)
31118     {
31119     case 1:
31120     {
31121       /* Must get all in one loop, as we're traversing a dynamic list */
31122       sum = ctcNumFree;
31123       infoEvent("LQH : TcConnection (operation) records in use/total %u/%u (%u bytes each)",
31124                 ctcConnectReserved - sum, ctcConnectReserved, (Uint32) sizeof(TcConnectionrec));
31125       resource++;
31126       position = 0;
31127       sum = 0;
31128       break;
31129     }
31130     case 2:
31131     {
31132       infoEvent("LQH : ScanRecord (Fragment) pool in use/total %u/%u (%u bytes each)",
31133                 c_scanRecordPool.getSize()-
31134                 c_scanRecordPool.getNoOfFree(),
31135                 c_scanRecordPool.getSize(),
31136                 (Uint32) sizeof(ScanRecord));
31137       resource++;
31138       position = 0;
31139       sum = 0;
31140       break;
31141     }
31142     default:
31143       return;
31144     }
31145 
31146     signal->theData[0] = DumpStateOrd::LqhDumpPoolLevels;
31147     signal->theData[1] = resource;
31148     signal->theData[2] = position;
31149     signal->theData[3] = sum;
31150     sendSignal(reference(), GSN_DUMP_STATE_ORD, signal, 4, JBB);
31151     return;
31152   }
31153 
31154   if (arg == DumpStateOrd::LqhReportCopyInfo)
31155   {
31156     jam();
31157     Uint64 duration = 0;
31158     Uint64 rate = 0;
31159 
31160     if (c_fragmentCopyStart == 0)
31161     {
31162       infoEvent("LDM(%u): CopyFrag complete no fragments copied",
31163                 instance());
31164       return;
31165     }
31166     duration = NdbTick_CurrentMillisecond() - c_fragmentCopyStart;
31167 
31168     if (duration == 0)
31169       duration = 1;
31170 
31171     rate = (c_totalBytesCopied * 1000) / duration;
31172 
31173     infoEvent("LDM(%u): CopyFrag complete. %u frags,"
31174               " +%llu/-%llu rows, "
31175               "%llu bytes/%llu ms %llu bytes/s.",
31176               instance(),
31177               c_fragmentsCopied,
31178               c_totalCopyRowsIns,
31179               c_totalCopyRowsDel,
31180               c_totalBytesCopied,
31181               duration,
31182               rate);
31183   }
31184 
31185 
31186   if (arg == 2355)
31187   {
31188     jam();
31189     /* Test clean signal shut-off at node failure */
31190     const Uint32 sigLen = signal->getLength();
31191 
31192     if (sigLen < 2)
31193     {
31194       jam();
31195       return;
31196     }
31197 
31198     const Uint32 nodeId = signal->theData[1];
31199 
31200     if (nodeId == cownNodeid)
31201     {
31202       jam();
31203       if (sigLen == 2)
31204       {
31205         jam();
31206         /* Initial request, wait a moment */
31207         sendSignalWithDelay(reference(),
31208                             GSN_DUMP_STATE_ORD,
31209                             signal,
31210                             200,
31211                             3);
31212         return;
31213       }
31214 
31215       jam();
31216 
31217       /* Goodbye */
31218       progError(__LINE__, NDBD_EXIT_ERROR_INSERT, __FILE__);
31219       return;
31220     }
31221     else
31222     {
31223       if (sigLen == 2)
31224       {
31225         /* Send a harmless signal to our counterpart on the node */
31226 
31227         BlockReference luckyRecipient = numberToRef(DBLQH, instance(), nodeId);
31228 
31229         g_eventLogger->info("LQH %u about to send slow signal to %u",
31230                             instance(), nodeId);
31231 
31232 #ifdef ERROR_INSERT
31233         setDelayedPrepare();
31234 #endif
31235 
31236         sendSignal(luckyRecipient,
31237                    GSN_DUMP_STATE_ORD,
31238                    signal,
31239                    3,
31240                    JBB);
31241 
31242         /* DelayedPrepare cancelled now */
31243         /* Do it again, unless the node has failed */
31244         if (getNodeInfo(nodeId).m_connected)
31245         {
31246           sendSignal(reference(),
31247                      GSN_DUMP_STATE_ORD,
31248                      signal,
31249                      2,
31250                      JBB);
31251         }
31252         else
31253         {
31254           g_eventLogger->info("LQH %u.  DUMP 2355 Node %u not connected anymore",
31255                               instance(), nodeId);
31256         }
31257       }
31258       else
31259       {
31260         jam();
31261         /* 'Harmless' longer signal */
31262       }
31263     }
31264     return;
31265   }
31266 }//Dblqh::execDUMP_STATE_ORD()
31267 
get_redo_stats(Uint64 & usage_in_mbytes,Uint64 & size_in_mbytes,Uint64 & written_since_last_in_bytes,Uint64 & update_size,Uint64 & insert_size,Uint64 & delete_size)31268 void Dblqh::get_redo_stats(Uint64 &usage_in_mbytes,
31269                            Uint64 &size_in_mbytes,
31270                            Uint64 &written_since_last_in_bytes,
31271                            Uint64& update_size,
31272                            Uint64& insert_size,
31273                            Uint64& delete_size)
31274 {
31275   /**
31276    * This method assumes that all log parts have the same size.
31277    * It reports the total written number of bytes in all parts.
31278    * It reports the size of one part and it reports the usage
31279    * level on the part with most Mbytes used.
31280    */
31281   size_in_mbytes = 0;
31282   usage_in_mbytes = 0;
31283   written_since_last_in_bytes = 0;
31284   update_size = m_update_size;
31285   insert_size = m_insert_size;
31286   delete_size = m_delete_size;
31287   for (Uint32 logpart = 0;
31288        logpart < clogPartFileSize;
31289        logpart++)
31290   {
31291     jam();
31292     logPartPtr.i = logpart;
31293     ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
31294 
31295     Uint64 total_mbyte = Uint64(logPartPtr.p->noLogFiles) *
31296                            Uint64(clogFileSize);
31297     jamLine(total_mbyte);
31298     size_in_mbytes = total_mbyte;
31299 
31300     LogFileRecordPtr logFilePtr;
31301     logFilePtr.i = logPartPtr.p->currentLogfile;
31302     ptrCheckGuard(logFilePtr, clogFileFileSize, logFileRecord);
31303 
31304     LogPosition head = { logFilePtr.p->fileNo, logFilePtr.p->currentMbyte };
31305     LogPosition tail = { logPartPtr.p->logTailFileNo,
31306                        logPartPtr.p->logTailMbyte };
31307     Uint64 mbyte_free = free_log(head,
31308                                  tail,
31309                                  logPartPtr.p->noLogFiles,
31310                                  clogFileSize);
31311     ndbrequire(total_mbyte >= mbyte_free);
31312     Uint64 mbyte_used = total_mbyte - mbyte_free;
31313     Uint64 last_written = logPartPtr.p->m_last_total_written_words;
31314     Uint64 current_written = logPartPtr.p->m_total_written_words;
31315     logPartPtr.p->m_last_total_written_words = current_written;
31316     Uint64 written_in_bytes = 4 * (current_written - last_written);
31317     written_since_last_in_bytes += written_in_bytes;
31318     if (mbyte_used > usage_in_mbytes)
31319     {
31320       jam();
31321       usage_in_mbytes = mbyte_used;
31322     }
31323   }
31324 }
31325 
execDBINFO_SCANREQ(Signal * signal)31326 void Dblqh::execDBINFO_SCANREQ(Signal *signal)
31327 {
31328   DbinfoScanReq req= *(DbinfoScanReq*)signal->theData;
31329   const Ndbinfo::ScanCursor* cursor =
31330     CAST_CONSTPTR(Ndbinfo::ScanCursor, DbinfoScan::getCursorPtr(&req));
31331   Ndbinfo::Ratelimit rl;
31332 
31333   jamEntry();
31334 
31335   switch(req.tableId){
31336   case Ndbinfo::LOGSPACES_TABLEID:
31337   {
31338     Uint32 logpart = cursor->data[0];
31339     while(logpart < clogPartFileSize)
31340     {
31341       jam();
31342 
31343       logPartPtr.i = logpart;
31344       ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
31345 
31346       LogFileRecordPtr logFile;
31347       logFile.i = logPartPtr.p->currentLogfile;
31348       ptrCheckGuard(logFile, clogFileFileSize, logFileRecord);
31349 
31350       LogPosition head = { logFile.p->fileNo, logFile.p->currentMbyte };
31351       LogPosition tail = { logPartPtr.p->logTailFileNo,
31352                            logPartPtr.p->logTailMbyte};
31353       Uint64 mb = free_log(head, tail, logPartPtr.p->noLogFiles, clogFileSize);
31354       Uint64 total = logPartPtr.p->noLogFiles * Uint64(clogFileSize);
31355       Uint64 high = 0; // TODO
31356 
31357       Ndbinfo::Row row(signal, req);
31358       row.write_uint32(getOwnNodeId());
31359       row.write_uint32(0);              // log type, 0 = REDO
31360       row.write_uint32(0);              // log id, always 0 in LQH
31361       row.write_uint32(logPartPtr.p->logPartNo); // log part
31362 
31363       row.write_uint64(total*1024*1024);          // total allocated
31364       row.write_uint64((total-mb)*1024*1024);     // currently in use
31365       row.write_uint64(high*1024*1024);           // in use high water mark
31366       ndbinfo_send_row(signal, req, row, rl);
31367       logpart++;
31368       if (rl.need_break(req))
31369       {
31370         jam();
31371         ndbinfo_send_scan_break(signal, req, rl, logpart);
31372         return;
31373       }
31374     }
31375     break;
31376   }
31377 
31378   case Ndbinfo::LOGBUFFERS_TABLEID:
31379   {
31380     const size_t entry_size = sizeof(LogPageRecord);
31381     const Uint64 free = cnoOfLogPages;
31382     const Uint64 total = clogPageCount;
31383     const Uint64 high = 0; // TODO
31384 
31385     Ndbinfo::Row row(signal, req);
31386     row.write_uint32(getOwnNodeId());
31387     row.write_uint32(Ndbinfo::REDO);  // log type = REDO
31388     row.write_uint32(0);              // log id, always 0 in LQH
31389     row.write_uint32(instance());     // log part, instance for ndbmtd
31390 
31391     row.write_uint64(total*entry_size);        // total allocated
31392     row.write_uint64((total-free)*entry_size); // currently in use
31393     row.write_uint64(high*entry_size);         // in use high water mark
31394     ndbinfo_send_row(signal, req, row, rl);
31395 
31396     break;
31397   }
31398 
31399   case Ndbinfo::COUNTERS_TABLEID:
31400   {
31401     Ndbinfo::counter_entry counters[] = {
31402       { Ndbinfo::OPERATIONS_COUNTER,              c_Counters.operations },
31403       { Ndbinfo::LQHKEY_OVERLOAD,                 c_keyOverloads },
31404       { Ndbinfo::LQHKEY_OVERLOAD_TC,              c_keyOverloadsTcNode },
31405       { Ndbinfo::LQHKEY_OVERLOAD_READER,          c_keyOverloadsReaderApi },
31406       { Ndbinfo::LQHKEY_OVERLOAD_NODE_PEER,       c_keyOverloadsPeerNode },
31407       { Ndbinfo::LQHKEY_OVERLOAD_SUBSCRIBER,      c_keyOverloadsSubscriber },
31408       { Ndbinfo::LQHSCAN_SLOWDOWN,                c_scanSlowDowns }
31409     };
31410     const size_t num_counters = sizeof(counters) / sizeof(counters[0]);
31411 
31412     Uint32 i = cursor->data[0];
31413     BlockNumber bn = blockToMain(number());
31414     while(i < num_counters)
31415     {
31416       jam();
31417       Ndbinfo::Row row(signal, req);
31418       row.write_uint32(getOwnNodeId());
31419       row.write_uint32(bn);           // block number
31420       row.write_uint32(instance());   // block instance
31421       row.write_uint32(counters[i].id);
31422 
31423       row.write_uint64(counters[i].val);
31424       ndbinfo_send_row(signal, req, row, rl);
31425       i++;
31426       if (rl.need_break(req))
31427       {
31428         jam();
31429         ndbinfo_send_scan_break(signal, req, rl, i);
31430         return;
31431       }
31432     }
31433     break;
31434   }
31435   case Ndbinfo::OPERATIONS_TABLEID:{
31436     Uint32 bucket = cursor->data[0];
31437 
31438     while (true)
31439     {
31440       if (rl.need_break(req))
31441       {
31442         jam();
31443         ndbinfo_send_scan_break(signal, req, rl, bucket);
31444         return;
31445       }
31446 
31447       for (; bucket < NDB_ARRAY_SIZE(ctransidHash); bucket++)
31448       {
31449         if (ctransidHash[bucket] != RNIL)
31450           break;
31451       }
31452 
31453       if (bucket == NDB_ARRAY_SIZE(ctransidHash))
31454       {
31455         break;
31456       }
31457 
31458       TcConnectionrecPtr tcPtr;
31459       tcPtr.i = ctransidHash[bucket];
31460       while (tcPtr.i != RNIL)
31461       {
31462         jam();
31463         ndbrequire(tcConnect_pool.getValidPtr(tcPtr));
31464         Ndbinfo::Row row(signal, req);
31465         ndbinfo_write_op(row, tcPtr);
31466         ndbinfo_send_row(signal, req, row, rl);
31467         tcPtr.i = tcPtr.p->nextHashRec;
31468       }
31469       bucket++;
31470     }
31471     break;
31472   }
31473   case Ndbinfo::FRAG_OPERATIONS_TABLEID:
31474   {
31475     Uint32 tableid = cursor->data[0];
31476 
31477     for (;tableid < ctabrecFileSize; tableid++)
31478     {
31479       TablerecPtr tabPtr;
31480       tabPtr.i = tableid;
31481       ptrAss(tabPtr, tablerec);
31482       if (tabPtr.p->tableStatus != Tablerec::NOT_DEFINED)
31483       {
31484         jam();
31485         // Loop over all fragments for this table.
31486         for (Uint32 f = 0; f<NDB_ARRAY_SIZE(tabPtr.p->fragrec); f++)
31487         {
31488           if (tabPtr.p->fragrec[f] != RNIL)
31489           {
31490             jam();
31491             Fragrecord* const frag =
31492               c_fragment_pool.getPtr(tabPtr.p->fragrec[f]);
31493 
31494             /* Get fragment's stats from TUP */
31495             const Dbtup::FragStats fs
31496                 = c_tup->get_frag_stats(frag->tupFragptr);
31497 
31498             const Uint64 commitCount = fs.committedChanges;
31499 
31500             Fragrecord::UsageStat& useStat = frag->m_useStat;
31501 
31502             Ndbinfo::Row row(signal, req);
31503             row.write_uint32(getOwnNodeId());
31504             row.write_uint32(instance());
31505             row.write_uint32(tableid);
31506             row.write_uint32(frag->fragId);
31507             row.write_uint64(useStat.m_readKeyReqCount);
31508             row.write_uint64(useStat.m_insKeyReqCount);
31509             row.write_uint64(useStat.m_updKeyReqCount);
31510             row.write_uint64(useStat.m_writeKeyReqCount);
31511             row.write_uint64(useStat.m_delKeyReqCount);
31512             row.write_uint64(useStat.m_keyRefCount);
31513             row.write_uint64(useStat.m_keyReqAttrWords * sizeof(Uint32));
31514             row.write_uint64(useStat.m_keyReqKeyWords * sizeof(Uint32));
31515             row.write_uint64(useStat.m_keyProgramWords * sizeof(Uint32));
31516             row.write_uint64(useStat.m_keyInstructionCount);
31517             row.write_uint64(useStat.m_keyReqWordsReturned * sizeof(Uint32));
31518             row.write_uint64(useStat.m_scanFragReqCount);
31519             row.write_uint64(useStat.m_scanRowsExamined);
31520             row.write_uint64(useStat.m_scanRowsReturned);
31521             row.write_uint64(useStat.m_scanWordsReturned * sizeof(Uint32));
31522             row.write_uint64(useStat.m_scanProgramWords * sizeof(Uint32));
31523             row.write_uint64(useStat.m_scanBoundWords * sizeof(Uint32));
31524             row.write_uint64(useStat.m_scanInstructionCount);
31525             row.write_uint64(useStat.m_queuedScanCount);
31526 
31527             row.write_uint32(Local_ScanRecord_list
31528                              (c_scanRecordPool, frag->m_activeScans).getCount());
31529 
31530             row.write_uint32(Local_ScanRecord_fifo
31531                              (c_scanRecordPool, frag->m_queuedScans).getCount());
31532 
31533             row.write_uint32(Local_ScanRecord_fifo
31534                              (c_scanRecordPool, frag->m_queuedTupScans).getCount());
31535 
31536             row.write_uint32(Local_ScanRecord_fifo
31537                              (c_scanRecordPool,
31538                               frag->m_queuedAccScans).getCount());
31539 
31540             row.write_uint64(commitCount);
31541             ndbinfo_send_row(signal, req, row, rl);
31542           }
31543         }
31544       }
31545 
31546       /*
31547         If a break is needed, break on a table bondary, as we use the table id
31548         as a cursor.
31549       */
31550       if (rl.need_break(req))
31551       {
31552         jam();
31553         ndbinfo_send_scan_break(signal, req, rl, tableid + 1);
31554         return;
31555       }
31556     }
31557 
31558     break;
31559   }
31560   case Ndbinfo::FRAG_MEM_USE_TABLEID:
31561   {
31562     /*
31563       Loop over all tables. cursor->data[0] shows where this batch should start.
31564      */
31565     for (Uint32 tableid = cursor->data[0]; tableid < ctabrecFileSize; tableid++)
31566     {
31567       TablerecPtr tabPtr;
31568       tabPtr.i = tableid;
31569       ptrAss(tabPtr, tablerec);
31570       if (tabPtr.p->tableStatus != Tablerec::NOT_DEFINED)
31571       {
31572         jam();
31573         // Loop over the fragments of this table.
31574         for (Uint32 fragNo = 0; fragNo<NDB_ARRAY_SIZE(tabPtr.p->fragrec);
31575              fragNo++)
31576         {
31577           FragrecordPtr myFragPtr;
31578           if ((myFragPtr.i = tabPtr.p->fragrec[fragNo]) != RNIL)
31579           {
31580             jam();
31581             c_fragment_pool.getPtr(myFragPtr);
31582 
31583             /* Get fragment's stats from TUP */
31584             const Dbtup::FragStats fs
31585               = c_tup->get_frag_stats(myFragPtr.p->tupFragptr);
31586 
31587             Ndbinfo::Row row(signal, req);
31588             row.write_uint32(getOwnNodeId());
31589             row.write_uint32(instance());
31590             row.write_uint32(tableid);
31591             row.write_uint32(myFragPtr.p->fragId);
31592 
31593             Uint64 hashMapBytes = 0;
31594             Uint32 accL2PMapBytes = 0;
31595 
31596             if (myFragPtr.p->accFragptr == RNIL)
31597             {
31598               jam();
31599               ndbassert(DictTabInfo::isOrderedIndex(tabPtr.p->tableType));
31600             }
31601             else
31602             {
31603               jam();
31604               accL2PMapBytes =
31605                 c_acc->getL2PMapAllocBytes(myFragPtr.p->accFragptr);
31606               hashMapBytes = c_acc->getLinHashByteSize(myFragPtr.p->accFragptr);
31607             }
31608 
31609             const Uint64 fixedSlotsAvailable =
31610               fs.fixedMemoryAllocPages * fs.fixedSlotsPerPage;
31611             ndbassert(fs.fixedElemCount <= fixedSlotsAvailable);
31612 
31613             const Uint64 fixedFreeSlots
31614               = fixedSlotsAvailable - fs.fixedElemCount;
31615 
31616             const Uint64 fixedFreeBytes
31617               = fixedFreeSlots * fs.fixedRecordBytes;
31618 
31619             row.write_uint64(fs.committedRowCount);
31620             row.write_uint64(fs.fixedMemoryAllocPages * fs.pageSizeBytes);
31621             row.write_uint64(fixedFreeBytes);
31622             row.write_uint64(fs.fixedElemCount);
31623             row.write_uint32(fs.fixedRecordBytes);
31624             row.write_uint64(fs.varMemoryAllocPages * fs.pageSizeBytes);
31625             row.write_uint64(fs.varMemoryFreeBytes);
31626             row.write_uint64(fs.varElemCount);
31627             row.write_uint64(fs.logToPhysMapAllocBytes);
31628             row.write_uint64(accL2PMapBytes);
31629             row.write_uint64(hashMapBytes);
31630 
31631             ndbinfo_send_row(signal, req, row, rl);
31632           }
31633         }
31634       }
31635 
31636       /*
31637         If necessary, make a break before starting on the next table. Since
31638         we use table ids to tell where to start the next batch, we cannot
31639         make a break before all fragments of the current table have been sent.
31640        */
31641       if (rl.need_break(req))
31642       {
31643         jam();
31644         ndbinfo_send_scan_break(signal, req, rl, tableid + 1);
31645         return;
31646       }
31647     }
31648 
31649     break;
31650   }
31651   case Ndbinfo::POOLS_TABLEID:
31652   {
31653     Ndbinfo::pool_entry pools[] =
31654     {
31655       { "LQH Operation Record",
31656         tcConnect_pool.getUsed(),
31657         tcConnect_pool.getSize(),
31658         tcConnect_pool.getEntrySize(),
31659         tcConnect_pool.getUsedHi(),
31660         { 0, 0, 0, 0},
31661         RT_DBLQH_TC_CONNECT},
31662       { "LQH scan Record",
31663         c_scanRecordPool.getUsed(),
31664         c_scanRecordPool.getSize(),
31665         c_scanRecordPool.getEntrySize(),
31666         c_scanRecordPool.getUsedHi(),
31667         { 0, 0, 0, 0},
31668         RT_DBLQH_SCAN_RECORD},
31669       { "Commit ACK Marker",
31670         m_commitAckMarkerPool.getUsed(),
31671         m_commitAckMarkerPool.getSize(),
31672         m_commitAckMarkerPool.getEntrySize(),
31673         m_commitAckMarkerPool.getUsedHi(),
31674         { CFG_DB_NO_LOCAL_OPS, CFG_DB_NO_OPS,0,0 },
31675         RT_DBLQH_COMMIT_ACK_MARKER},
31676       { NULL, 0,0,0,0,{0,0,0,0},0 }
31677     };
31678 
31679     const size_t num_config_params =
31680       sizeof(pools[0].config_params) / sizeof(pools[0].config_params[0]);
31681     Uint32 pool = cursor->data[0];
31682     BlockNumber bn = blockToMain(number());
31683     while(pools[pool].poolname)
31684     {
31685       jam();
31686       Ndbinfo::Row row(signal, req);
31687       row.write_uint32(getOwnNodeId());
31688       row.write_uint32(bn);           // block number
31689       row.write_uint32(instance());   // block instance
31690       row.write_string(pools[pool].poolname);
31691 
31692       row.write_uint64(pools[pool].used);
31693       row.write_uint64(pools[pool].total);
31694       row.write_uint64(pools[pool].used_hi);
31695       row.write_uint64(pools[pool].entry_size);
31696       for (size_t i = 0; i < num_config_params; i++)
31697         row.write_uint32(pools[pool].config_params[i]);
31698       row.write_uint32(GET_RG(pools[pool].record_type));
31699       row.write_uint32(GET_TID(pools[pool].record_type));
31700       ndbinfo_send_row(signal, req, row, rl);
31701       pool++;
31702       if (rl.need_break(req))
31703       {
31704         jam();
31705         ndbinfo_send_scan_break(signal, req, rl, pool);
31706         return;
31707       }
31708     }
31709     break;
31710   }
31711 
31712   default:
31713     break;
31714   }
31715 
31716   ndbinfo_send_scan_conf(signal, req, rl);
31717 }
31718 
31719 void
ndbinfo_write_op(Ndbinfo::Row & row,TcConnectionrecPtr tcPtr)31720 Dblqh::ndbinfo_write_op(Ndbinfo::Row & row, TcConnectionrecPtr tcPtr)
31721 {
31722   row.write_uint32(getOwnNodeId());
31723   row.write_uint32(instance());          // block instance
31724   row.write_uint32(tcPtr.i);             // objid
31725   row.write_uint32(tcPtr.p->tcBlockref); // tcref
31726   row.write_uint32(tcPtr.p->applRef);    // apiref
31727 
31728   row.write_uint32(tcPtr.p->transid[0]);
31729   row.write_uint32(tcPtr.p->transid[1]);
31730   row.write_uint32(tcPtr.p->tableref);
31731   row.write_uint32(tcPtr.p->fragmentid);
31732 
31733   if (tcPtr.p->tcScanRec != RNIL)
31734   {
31735     ScanRecordPtr sp;
31736     sp.i = tcPtr.p->tcScanRec;
31737     c_scanRecordPool.getPtr(sp);
31738 
31739     Uint32 op = NDB_INFO_OP_SCAN_UNKNOWN;
31740     if (sp.p->scanLockMode)
31741       op = NDB_INFO_OP_SCAN_EX;
31742     else if (sp.p->scanLockHold)
31743       op = NDB_INFO_OP_SCAN_SH;
31744     else
31745       op = NDB_INFO_OP_SCAN;
31746 
31747     row.write_uint32(op);
31748     row.write_uint32(sp.p->scanState);
31749     row.write_uint32(0);
31750   }
31751   else
31752   {
31753     Uint32 op = NDB_INFO_OP_UNKNOWN;
31754     switch(tcPtr.p->operation){
31755     case ZREAD:
31756       if (tcPtr.p->lockType)
31757 	op = NDB_INFO_OP_READ_EX;
31758       else if (!tcPtr.p->dirtyOp)
31759 	op = NDB_INFO_OP_READ_SH;
31760       else
31761         op = NDB_INFO_OP_READ;
31762       break;
31763     case ZINSERT:
31764       op = NDB_INFO_OP_INSERT;
31765       break;
31766     case ZUPDATE:
31767       op = NDB_INFO_OP_UPDATE;
31768       break;
31769     case ZDELETE:
31770       op = NDB_INFO_OP_DELETE;
31771       break;
31772     case ZWRITE:
31773       op = NDB_INFO_OP_WRITE;
31774       break;
31775     case ZUNLOCK:
31776       op = NDB_INFO_OP_UNLOCK;
31777       break;
31778     case ZREFRESH:
31779       op = NDB_INFO_OP_REFRESH;
31780       break;
31781     }
31782     row.write_uint32(op);
31783     row.write_uint32(tcPtr.p->transactionState);
31784     row.write_uint32(0);
31785   }
31786 }
31787 
31788 
31789 void
startLcpFragWatchdog(Signal * signal)31790 Dblqh::startLcpFragWatchdog(Signal* signal)
31791 {
31792   jam();
31793   DEB_LCP(("(%u)startLcpFragWatchdog", instance()));
31794   /* Must not already be running */
31795   /* Thread could still be active from a previous run */
31796   ndbrequire(c_lcpFragWatchdog.scan_running == false);
31797   c_lcpFragWatchdog.scan_running = true;
31798   c_lcpFragWatchdog.elapsedNoProgressMillis = 0;
31799   c_lcpFragWatchdog.lastChecked = NdbTick_getCurrentTicks();
31800 
31801   /* If thread is not already active, start it */
31802   if (! c_lcpFragWatchdog.thread_active)
31803   {
31804     jam();
31805     invokeLcpFragWatchdogThread(signal);
31806   }
31807 
31808   ndbrequire(c_lcpFragWatchdog.thread_active == true);
31809 }
31810 
31811 void
invokeLcpFragWatchdogThread(Signal * signal)31812 Dblqh::invokeLcpFragWatchdogThread(Signal* signal)
31813 {
31814   jam();
31815   ndbrequire(c_lcpFragWatchdog.scan_running);
31816 
31817   c_lcpFragWatchdog.thread_active = true;
31818 
31819   signal->getDataPtrSend()[0] = ZLCP_FRAG_WATCHDOG;
31820   sendSignalWithDelay(cownref, GSN_CONTINUEB, signal,
31821                       LCPFragWatchdog::PollingPeriodMillis, 1);
31822 
31823   LcpStatusReq* req = (LcpStatusReq*)signal->getDataPtr();
31824   req->senderRef = cownref;
31825   req->senderData = 1;
31826   BlockReference backupRef = calcInstanceBlockRef(BACKUP);
31827   sendSignal(backupRef, GSN_LCP_STATUS_REQ, signal,
31828              LcpStatusReq::SignalLength, JBB);
31829 }
31830 
31831 void
execLCP_STATUS_CONF(Signal * signal)31832 Dblqh::execLCP_STATUS_CONF(Signal* signal)
31833 {
31834   jamEntry();
31835   LcpStatusConf* conf = (LcpStatusConf*) signal->getDataPtr();
31836 
31837   if (conf->senderData == 0)
31838   {
31839     /* DUMP STATE variant */
31840     ndbout_c("Received LCP_STATUS_CONF from %x", conf->senderRef);
31841     ndbout_c("  Status = %u, Table = %u, Frag = %u",
31842              conf->lcpState,
31843              conf->tableId,
31844              conf->fragId);
31845     ndbout_c("  Completion State %llu",
31846              (((Uint64)conf->completionStateHi) << 32) + conf->completionStateLo);
31847     ndbout_c("  Lcp done rows %llu, done bytes %llu",
31848              (((Uint64)conf->lcpDoneRowsHi) << 32) + conf->lcpDoneRowsLo,
31849              (((Uint64)conf->lcpDoneBytesHi) << 32) + conf->lcpDoneBytesLo);
31850     ndbout_c(" Lcp scanned %u pages", conf->lcpScannedPages);
31851   }
31852 
31853   /* We can ignore the LCP status as if it's complete then we should
31854    * promptly stop watching
31855    */
31856   c_lcpFragWatchdog.handleLcpStatusRep((LcpStatusConf::LcpState)conf->lcpState,
31857                                        conf->tableId,
31858                                        conf->fragId,
31859                                        (((Uint64)conf->completionStateHi) << 32) +
31860                                        conf->completionStateLo,
31861                                        conf->lcpScannedPages);
31862 }
31863 
31864 void
execLCP_STATUS_REF(Signal * signal)31865 Dblqh::execLCP_STATUS_REF(Signal* signal)
31866 {
31867   jamEntry();
31868   LcpStatusRef* ref = (LcpStatusRef*) signal->getDataPtr();
31869 
31870   ndbout_c("Received LCP_STATUS_REF from %x, senderData = %u with error code %u",
31871            ref->senderRef, ref->senderData, ref->error);
31872 
31873   ndbabort();
31874 }
31875 
31876 void
reset()31877 Dblqh::LCPFragWatchdog::reset()
31878 {
31879   jamBlock(block);
31880   scan_running = false;
31881   lcpState = LcpStatusConf::LCP_IDLE;
31882   tableId = ~Uint32(0);
31883   fragId = ~Uint32(0);
31884   completionStatus = ~Uint64(0);
31885   lcpScannedPages = 0;
31886   elapsedNoProgressMillis = 0;
31887   NdbTick_Invalidate(&lastChecked);
31888 }
31889 
31890 void
handleLcpStatusRep(LcpStatusConf::LcpState repLcpState,Uint32 repTableId,Uint32 repFragId,Uint64 repCompletionStatus,Uint32 repLcpScannedPages)31891 Dblqh::LCPFragWatchdog::handleLcpStatusRep(LcpStatusConf::LcpState repLcpState,
31892                                            Uint32 repTableId,
31893                                            Uint32 repFragId,
31894                                            Uint64 repCompletionStatus,
31895                                            Uint32 repLcpScannedPages)
31896 {
31897   jamBlock(block);
31898   if (scan_running)
31899   {
31900     jamBlock(block);
31901     if ((repCompletionStatus != completionStatus) ||
31902         (repFragId != fragId) ||
31903         (repTableId != tableId) ||
31904         (repLcpState != lcpState) ||
31905         (repLcpScannedPages != lcpScannedPages))
31906     {
31907       jamBlock(block);
31908       /* Something moved since last time, reset
31909        * progress monitor and data.
31910        */
31911       elapsedNoProgressMillis = 0;
31912       lastChecked = NdbTick_getCurrentTicks();
31913       lcpState = repLcpState;
31914       tableId = repTableId;
31915       fragId = repFragId;
31916       completionStatus = repCompletionStatus;
31917       lcpScannedPages = repLcpScannedPages;
31918     }
31919   }
31920 }
31921 
31922 
31923 const char*
lcpStateString(LcpStatusConf::LcpState lcpState)31924 Dblqh::lcpStateString(LcpStatusConf::LcpState lcpState)
31925 {
31926   switch (lcpState)
31927   {
31928     case LcpStatusConf::LCP_IDLE:
31929       return "LCP_IDLE";
31930     case LcpStatusConf::LCP_PREPARED:
31931       return "LCP_PREPARED";
31932     case LcpStatusConf::LCP_SCANNING:
31933       return "LCP_SCANNING";
31934     case LcpStatusConf::LCP_SCANNED:
31935       return "LCP_SCANNED";
31936     case LcpStatusConf::LCP_PREPARE_READ_CTL_FILES:
31937       return "LCP_PREPARE_READ_CTL_FILES";
31938     case LcpStatusConf::LCP_PREPARE_OPEN_DATA_FILE:
31939       return "LCP_PREPARE_OPEN_DATA_FILE";
31940     case LcpStatusConf::LCP_PREPARE_READ_TABLE_DESC:
31941       return "LCP_PREPARE_READ_TABLE_DESC";
31942     case LcpStatusConf::LCP_PREPARE_ABORTING:
31943       return "LCP_PREPARE_ABORTING";
31944     case LcpStatusConf::LCP_WAIT_END_LCP:
31945       return "LCP_WAIT_END_LCP";
31946     case LcpStatusConf::LCP_PREPARE_WAIT_DROP_CASE:
31947       return "LCP_PREPARE_WAIT_DROP_CASE";
31948     case LcpStatusConf::LCP_WAIT_SYNC_DISK:
31949       return "LCP_WAIT_SYNC_DISK";
31950     case LcpStatusConf::LCP_WAIT_SYNC_EXTENT:
31951       return "LCP_WAIT_SYNC_EXTENT";
31952     case LcpStatusConf::LCP_WAIT_WRITE_CTL_FILE:
31953       return "LCP_WAIT_WRITE_CTL_FILE";
31954     case LcpStatusConf::LCP_WAIT_CLOSE_EMPTY:
31955       return "LCP_WAIT_CLOSE_EMPTY";
31956     case LcpStatusConf::LCP_WAIT_FINAL_SYNC_EXTENT:
31957       return "LCP_WAIT_FINAL_SYNC_EXTENT";
31958     default:
31959       return "LCP_UNKNOWN_STATE";
31960   }
31961   return NULL;
31962 }
31963 
31964 void
execINFO_GCP_STOP_TIMER(Signal * signal)31965 Dblqh::execINFO_GCP_STOP_TIMER(Signal *signal)
31966 {
31967   c_gcp_stop_timer = signal->theData[0];
31968 }
31969 
31970 /**
31971  * checkLcpFragWatchdog
31972  *
31973  * This method implements the LCP Frag watchdog 'thread', periodically
31974  * checking for progress in the current LCP fragment scan
31975  */
31976 void
checkLcpFragWatchdog(Signal * signal)31977 Dblqh::checkLcpFragWatchdog(Signal* signal)
31978 {
31979   jam();
31980   ndbrequire(c_lcpFragWatchdog.thread_active == true);
31981 
31982   if (!c_lcpFragWatchdog.scan_running)
31983   {
31984     jam();
31985     /* We've been asked to stop */
31986     c_lcpFragWatchdog.thread_active = false;
31987     return;
31988   }
31989 
31990   // Calculate real time elapsed since last check
31991   const NDB_TICKS now = NdbTick_getCurrentTicks();
31992   const NDB_TICKS last = c_lcpFragWatchdog.lastChecked;
31993   c_lcpFragWatchdog.lastChecked = now;
31994 
31995   /**
31996    * Avoid false LCP failures if timers misbehaves,
31997    * (timer is non-monotonic, or OS/VM bugs which there are some of)
31998    * or we have scheduler problems due to being CPU starved:
31999    *
32000    * - If we overslept 'PollingPeriodMillis', (CPU starved?) or
32001    *   timer leapt forward for other reasons (Adjusted, or OS-bug)
32002    *   we never calculate an elapsed periode of more than
32003    *   the requested sleep 'PollingPeriodMillis'
32004    * - Else we add the real measured elapsed time to total.
32005    *   (Timers may fire prior to requested 'PollingPeriodMillis')
32006    *
32007    * Note: If timer for some reason ticked backwards such that
32008    *       'now < last', NdbTick_Elapsed() will return '0' such
32009    *       that this is 'absorbed'
32010    */
32011   Uint32 elapsed = (Uint32)NdbTick_Elapsed(last,now).milliSec();
32012   if (elapsed > LCPFragWatchdog::PollingPeriodMillis)
32013     elapsed = LCPFragWatchdog::PollingPeriodMillis;
32014 
32015   c_lcpFragWatchdog.elapsedNoProgressMillis += elapsed;
32016 
32017   /* Check how long we've been waiting for progress on this scan */
32018   if ((c_lcpFragWatchdog.WarnElapsedWithNoProgressMillis > 0) &&
32019       ((c_lcpFragWatchdog.elapsedNoProgressMillis >=
32020         c_lcpFragWatchdog.WarnElapsedWithNoProgressMillis)))
32021   {
32022     jam();
32023     const char* completionStatusString =
32024       (c_lcpFragWatchdog.lcpState == LcpStatusConf::LCP_SCANNING?
32025        "rows completed":
32026        "bytes remaining.");
32027 
32028     warningEvent("LCP Frag watchdog : No progress on table %u, frag %u for %u s."
32029                  "  %llu %s, state: %s",
32030                  c_lcpFragWatchdog.tableId,
32031                  c_lcpFragWatchdog.fragId,
32032                  c_lcpFragWatchdog.elapsedNoProgressMillis / 1000,
32033                  c_lcpFragWatchdog.completionStatus,
32034                  completionStatusString,
32035                  lcpStateString(c_lcpFragWatchdog.lcpState));
32036     c_tup->lcp_frag_watchdog_print(c_lcpFragWatchdog.tableId,
32037                                    c_lcpFragWatchdog.fragId);
32038     g_eventLogger->info("LCP Frag watchdog : No progress on table %u,"
32039                         " frag %u for %u s."
32040                         "  %llu %s, state: %s",
32041              c_lcpFragWatchdog.tableId,
32042              c_lcpFragWatchdog.fragId,
32043              c_lcpFragWatchdog.elapsedNoProgressMillis / 1000,
32044              c_lcpFragWatchdog.completionStatus,
32045              completionStatusString,
32046              lcpStateString(c_lcpFragWatchdog.lcpState));
32047 
32048     Uint32 max_no_progress_time =
32049       c_lcpFragWatchdog.MaxElapsedWithNoProgressMillis;
32050 
32051     if ((c_lcpFragWatchdog.lcpState == LcpStatusConf::LCP_WAIT_END_LCP) &&
32052         (max_no_progress_time < (2 * c_gcp_stop_timer)))
32053     {
32054       jam();
32055       max_no_progress_time = 2 * c_gcp_stop_timer;
32056     }
32057     if (c_lcpFragWatchdog.elapsedNoProgressMillis >= max_no_progress_time)
32058     {
32059       jam();
32060       /* Too long with no progress... */
32061 
32062       warningEvent("LCP Frag watchdog : Checkpoint of table %u fragment %u "
32063                    "too slow (no progress for > %u s, state: %s).",
32064                    c_lcpFragWatchdog.tableId,
32065                    c_lcpFragWatchdog.fragId,
32066                    c_lcpFragWatchdog.elapsedNoProgressMillis / 1000,
32067                    lcpStateString(c_lcpFragWatchdog.lcpState));
32068       ndbout_c("LCP Frag watchdog : Checkpoint of table %u fragment %u "
32069                "too slow (no progress for > %u s, state: %s).",
32070                c_lcpFragWatchdog.tableId,
32071                c_lcpFragWatchdog.fragId,
32072                c_lcpFragWatchdog.elapsedNoProgressMillis / 1000,
32073                lcpStateString(c_lcpFragWatchdog.lcpState));
32074 
32075       /**
32076        * Dump some LCP and GCP state for debugging...
32077        * Also dump some states in master node to see if some LCP
32078        * GCP or other protocol stalled.
32079        */
32080       {
32081         DumpStateOrd* ds = (DumpStateOrd*) signal->getDataPtrSend();
32082 
32083         /* DIH : */
32084         ds->args[0] = DumpStateOrd::DihDumpLCPState;
32085         sendSignal(DBDIH_REF, GSN_DUMP_STATE_ORD, signal, 1, JBA);
32086 
32087         ds->args[0] = 7012;
32088         sendSignal(DBDIH_REF, GSN_DUMP_STATE_ORD, signal, 1, JBA);
32089 
32090         ds->args[0] = 7011;
32091         sendSignal(DBDIH_REF, GSN_DUMP_STATE_ORD, signal, 1, JBA);
32092 
32093         /* Get ref to our LDM's Backup instance */
32094         const BlockReference backupRef = calcInstanceBlockRef(BACKUP);
32095 
32096         /* BACKUP : */
32097         ds->args[0] = 23;
32098         sendSignal(backupRef, GSN_DUMP_STATE_ORD, signal, 1, JBA);
32099 
32100         ds->args[0] = 24;
32101         sendSignal(backupRef, GSN_DUMP_STATE_ORD, signal, 1, JBA);
32102 
32103         /* LQH : */
32104         ds->args[0] = DumpStateOrd::LqhDumpLcpState;
32105         sendSignal(cownref, GSN_DUMP_STATE_ORD, signal, 1, JBA);
32106 
32107         /* NDBFS : */
32108         ds->args[0] = DumpStateOrd::NdbfsDumpFileStat;
32109         sendSignal(NDBFS_REF, GSN_DUMP_STATE_ORD, signal, 1, JBA);
32110 
32111         ds->args[0] = DumpStateOrd::NdbfsDumpOpenFiles;
32112         sendSignal(NDBFS_REF, GSN_DUMP_STATE_ORD, signal, 1, JBA);
32113 
32114         ds->args[0] = DumpStateOrd::NdbfsDumpRequests;
32115         sendSignal(NDBFS_REF, GSN_DUMP_STATE_ORD, signal, 1, JBA);
32116 
32117         const Uint32 ShutdownDelayMillis = 5 * 1000;
32118         /* Delay self-execution to give time for dump output */
32119         ds->args[0] = 2309;
32120         sendSignalWithDelay(cownref,
32121                             GSN_DUMP_STATE_ORD,
32122                             signal,
32123                             ShutdownDelayMillis,
32124                             1);
32125 
32126         /**
32127          * Ask other nodes to isolate me in delay + 500 millis if I have
32128          * not failed by then
32129          */
32130         {
32131           IsolateOrd* ord = (IsolateOrd*) signal->theData;
32132           ord->senderRef = reference();
32133           ord->isolateStep = IsolateOrd::IS_REQ;
32134           ord->delayMillis = ShutdownDelayMillis + 500;
32135 
32136           NdbNodeBitmask victims;
32137           victims.set(cownNodeid);
32138 
32139           /* QMGR handles this */
32140           LinearSectionPtr lsptr[3];
32141           lsptr[0].p = (Uint32*)&victims;
32142           lsptr[0].sz = victims.getPackedLengthInWords();
32143           sendSignal(QMGR_REF,
32144                      GSN_ISOLATE_ORD,
32145                      signal,
32146                      IsolateOrd::SignalLength,
32147                      JBA,
32148                      lsptr,
32149                      1);
32150         }
32151       }
32152 
32153       return;
32154     }
32155   }
32156 
32157   invokeLcpFragWatchdogThread(signal);
32158 }
32159 
32160 void
stopLcpFragWatchdog()32161 Dblqh::stopLcpFragWatchdog()
32162 {
32163   jam();
32164   /* Mark watchdog as no longer running,
32165    * If the 'thread' is active then it will
32166    * stop at the next wakeup
32167    */
32168   DEB_LCP(("(%u)stopLcpFragWatchdog", instance()));
32169   ndbrequire(c_lcpFragWatchdog.scan_running);
32170   c_lcpFragWatchdog.reset();
32171 }
32172 
32173 /* **************************************************************** */
32174 /* ---------------------------------------------------------------- */
32175 /* ---------------------- TRIGGER HANDLING ------------------------ */
32176 /* ---------------------------------------------------------------- */
32177 /*                                                                  */
32178 /*      All trigger signals from TRIX are forwarded top TUP         */
32179 /* ---------------------------------------------------------------- */
32180 /* **************************************************************** */
32181 
32182 // Trigger signals
32183 void
execCREATE_TRIG_IMPL_REQ(Signal * signal)32184 Dblqh::execCREATE_TRIG_IMPL_REQ(Signal* signal)
32185 {
32186   jamEntry();
32187 
32188   if (!assembleFragments(signal))
32189   {
32190     jam();
32191     return;
32192   }
32193 
32194   CreateTrigImplReq* req = (CreateTrigImplReq*)signal->getDataPtrSend();
32195   SectionHandle handle(this, signal);
32196   req->senderRef = reference();
32197   BlockReference tupRef = calcInstanceBlockRef(DBTUP);
32198   sendSignal(tupRef, GSN_CREATE_TRIG_IMPL_REQ, signal,
32199              signal->getLength(), JBB, &handle);
32200 }
32201 
32202 void
execCREATE_TRIG_IMPL_CONF(Signal * signal)32203 Dblqh::execCREATE_TRIG_IMPL_CONF(Signal* signal)
32204 {
32205   jamEntry();
32206 
32207   BlockReference dictRef = !isNdbMtLqh() ? DBDICT_REF : DBLQH_REF;
32208   sendSignal(dictRef, GSN_CREATE_TRIG_IMPL_CONF, signal,
32209              CreateTrigImplConf::SignalLength, JBB);
32210 }
32211 
32212 void
execCREATE_TRIG_IMPL_REF(Signal * signal)32213 Dblqh::execCREATE_TRIG_IMPL_REF(Signal* signal)
32214 {
32215   jamEntry();
32216 
32217   BlockReference dictRef = !isNdbMtLqh() ? DBDICT_REF : DBLQH_REF;
32218   sendSignal(dictRef, GSN_CREATE_TRIG_IMPL_REF, signal,
32219              CreateTrigImplRef::SignalLength, JBB);
32220 }
32221 
32222 void
execDROP_TRIG_IMPL_REQ(Signal * signal)32223 Dblqh::execDROP_TRIG_IMPL_REQ(Signal* signal)
32224 {
32225   jamEntry();
32226 
32227   DropTrigImplReq* req = (DropTrigImplReq*)signal->getDataPtrSend();
32228   req->senderRef = reference();
32229   BlockReference tupRef = calcInstanceBlockRef(DBTUP);
32230   sendSignal(tupRef, GSN_DROP_TRIG_IMPL_REQ, signal,
32231              DropTrigImplReq::SignalLength, JBB);
32232 }
32233 
32234 void
execDROP_TRIG_IMPL_CONF(Signal * signal)32235 Dblqh::execDROP_TRIG_IMPL_CONF(Signal* signal)
32236 {
32237   jamEntry();
32238 
32239   BlockReference dictRef = !isNdbMtLqh() ? DBDICT_REF : DBLQH_REF;
32240   sendSignal(dictRef, GSN_DROP_TRIG_IMPL_CONF, signal,
32241              DropTrigImplConf::SignalLength, JBB);
32242 }
32243 
32244 void
execDROP_TRIG_IMPL_REF(Signal * signal)32245 Dblqh::execDROP_TRIG_IMPL_REF(Signal* signal)
32246 {
32247   jamEntry();
32248 
32249   BlockReference dictRef = !isNdbMtLqh() ? DBDICT_REF : DBLQH_REF;
32250   sendSignal(dictRef, GSN_DROP_TRIG_IMPL_REF, signal,
32251              DropTrigImplRef::SignalLength, JBB);
32252 }
32253 
calcPageCheckSum(LogPageRecordPtr logP)32254 Uint32 Dblqh::calcPageCheckSum(LogPageRecordPtr logP){
32255     Uint32 checkSum = 37;
32256 #ifdef VM_TRACE
32257     checkSum = computeXorChecksum(
32258                  logP.p->logPageWord + (ZPOS_CHECKSUM+1),
32259                  ZPAGE_SIZE - (ZPOS_CHECKSUM+1),
32260                  checkSum);
32261 #endif
32262     return checkSum;
32263   }
32264 
32265 #ifdef NDB_DEBUG_FULL
32266 #ifdef ERROR_INSERT
32267 void
sendSignal(Uint32 ref,Uint32 gsn,Signal * signal,Uint32 len,Uint32 prio)32268 TraceLCP::sendSignal(Uint32 ref, Uint32 gsn, Signal* signal,
32269 		     Uint32 len, Uint32 prio)
32270 {
32271   Sig s;
32272   s.type = Sig::Sig_send;
32273   s.header = signal->header;
32274   s.header.theVerId_signalNumber = gsn;
32275   s.header.theReceiversBlockNumber = ref;
32276   s.header.theLength = len;
32277   memcpy(s.theData, signal->theData, 4 * len);
32278   m_signals.push_back(s);
32279   assert(signal->getNoOfSections() == 0);
32280 }
32281 
32282 void
save(Signal * signal)32283 TraceLCP::save(Signal* signal){
32284   Sig s;
32285   s.type = Sig::Sig_save;
32286   s.header = signal->header;
32287   memcpy(s.theData, signal->theData, 4 * signal->getLength());
32288   m_signals.push_back(s);
32289   assert(signal->getNoOfSections() == 0);
32290 }
32291 
32292 void
restore(SimulatedBlock & lqh,Signal * sig)32293 TraceLCP::restore(SimulatedBlock& lqh, Signal* sig){
32294   Uint32 cnt = m_signals.size();
32295   for(Uint32 i = 0; i<cnt; i++){
32296     sig->header = m_signals[i].header;
32297     memcpy(sig->theData, m_signals[i].theData, 4 * sig->getLength());
32298     switch(m_signals[i].type){
32299     case Sig::Sig_send:
32300       lqh.sendSignal(sig->header.theReceiversBlockNumber,
32301 		     sig->header.theVerId_signalNumber,
32302 		     sig,
32303 		     sig->header.theLength,
32304 		     JBB);
32305       break;
32306     case Sig::Sig_save:
32307       lqh.executeFunction(sig->header.theVerId_signalNumber, sig);
32308       break;
32309     }
32310   }
32311   m_signals.clear();
32312 }
32313 #endif
32314 #endif
32315 
writeDbgInfoPageHeader(LogPageRecordPtr logP,Uint32 place,Uint32 pageNo,Uint32 wordWritten)32316 void Dblqh::writeDbgInfoPageHeader(LogPageRecordPtr logP, Uint32 place,
32317                                    Uint32 pageNo, Uint32 wordWritten)
32318 {
32319   logP.p->logPageWord[ZPOS_LOG_TIMER]= logPartPtr.p->logTimer;
32320   logP.p->logPageWord[ZPOS_PREV_PAGE_NO]= logP.p->logPageWord[ZPOS_PAGE_NO];
32321   logP.p->logPageWord[ZPOS_PAGE_I]= logP.i;
32322   logP.p->logPageWord[ZPOS_PLACE_WRITTEN_FROM]= place;
32323   logP.p->logPageWord[ZPOS_PAGE_NO]= pageNo;
32324   logP.p->logPageWord[ZPOS_PAGE_FILE_NO]= logFilePtr.p->fileNo;
32325   logP.p->logPageWord[ZPOS_WORD_WRITTEN]= wordWritten;
32326   logP.p->logPageWord[ZPOS_IN_WRITING]= 1;
32327 }
32328 
initReportStatus(Signal * signal)32329 void Dblqh::initReportStatus(Signal* signal){
32330   m_last_report_time = NdbTick_getCurrentTicks();
32331 }
32332 
checkReportStatus(Signal * signal)32333 void Dblqh::checkReportStatus(Signal* signal){
32334   if (m_startup_report_frequency == 0)
32335     return;
32336 
32337   const NDB_TICKS now = NdbTick_getCurrentTicks();
32338   const Uint64 elapsed = NdbTick_Elapsed(m_last_report_time, now).seconds();
32339   if (elapsed > m_startup_report_frequency)
32340   {
32341     reportStatus(signal);
32342     m_last_report_time = now;
32343   }
32344 }
32345 
reportStatus(Signal * signal)32346 void Dblqh::reportStatus(Signal* signal){
32347   const int signal_length = 6;
32348 
32349   signal->theData[0] = NDB_LE_LogFileInitStatus;
32350   signal->theData[1] = reference();
32351   for (int i = 2; i < signal_length; i++)
32352     signal->theData[i] = 0;
32353   if (getNodeState().startLevel < NodeState::SL_STARTED){
32354     signal->theData[2] = totalLogFiles;
32355     signal->theData[3] = logFileInitDone;
32356     signal->theData[4] = totallogMBytes;
32357     signal->theData[5] = logMBytesInitDone;
32358   }
32359   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, signal_length, JBB);
32360 }
32361 
logfileInitCompleteReport(Signal * signal)32362 void Dblqh::logfileInitCompleteReport(Signal* signal){
32363   const int signal_length = 6;
32364 
32365   signal->theData[0] = NDB_LE_LogFileInitCompStatus;
32366   signal->theData[1] = reference();
32367   signal->theData[2] = totalLogFiles;
32368   signal->theData[3] = logFileInitDone;
32369   signal->theData[4] = totallogMBytes;
32370   signal->theData[5] = logMBytesInitDone;
32371   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, signal_length, JBB);
32372 }
32373 
32374 #ifdef NDBD_TRACENR
32375 void
TRACE_OP_DUMP(const Dblqh::TcConnectionrec * regTcPtr,const char * pos)32376 Dblqh::TRACE_OP_DUMP(const Dblqh::TcConnectionrec* regTcPtr, const char * pos)
32377 {
32378   (* traceopout)
32379     << "[ " << hex << regTcPtr->transid[0]
32380     << " " << hex << regTcPtr->transid[1] << " ] " << dec
32381     << pos
32382     << " " << (Operation_t)regTcPtr->operation
32383     << " " << regTcPtr->tableref
32384     << "(" << regTcPtr->fragmentid << ")"
32385     << "(" << (regTcPtr->seqNoReplica == 0 ? "P" : "B") << ")" ;
32386 
32387   {
32388     (* traceopout) << "key=[" << hex;
32389     if (regTcPtr->keyInfoIVal != RNIL)
32390     {
32391       SectionReader keyInfoReader(regTcPtr->keyInfoIVal,
32392                                   g_sectionSegmentPool);
32393 
32394       Uint32 keyWord;
32395       while (keyInfoReader.getWord(&keyWord))
32396         (* traceopout) << hex << keyWord << " ";
32397     }
32398     (* traceopout) << "] ";
32399   }
32400 
32401   if (regTcPtr->m_use_rowid)
32402     (* traceopout) << " " << regTcPtr->m_row_id;
32403   (* traceopout) << endl;
32404 }
32405 #endif
32406 
32407 Uint32
get_node_status(Uint32 nodeId) const32408 Dblqh::get_node_status(Uint32 nodeId) const
32409 {
32410   HostRecordPtr Thostptr;
32411   Thostptr.i = nodeId;
32412   ptrCheckGuard(Thostptr, chostFileSize, hostRecord);
32413   return Thostptr.p->nodestatus;
32414 }
32415 
32416 #ifndef NO_REDO_PAGE_CACHE
32417 /**
32418  * Don't cache pages if less then 64 pages are free
32419  */
32420 #define MIN_REDO_PAGES_FREE 64
32421 
32422 void
do_evict(RedoPageCache & cache,Ptr<RedoCacheLogPageRecord> pagePtr)32423 Dblqh::do_evict(RedoPageCache& cache, Ptr<RedoCacheLogPageRecord> pagePtr)
32424 {
32425   LogPageRecordPtr save = logPagePtr;
32426   cache.m_lru.remove(pagePtr);
32427   cache.m_hash.remove(pagePtr);
32428   if (0)
32429   ndbout_c("evict part: %u file: %u page: %u cnoOfLogPages: %u",
32430            pagePtr.p->m_part_no,
32431            pagePtr.p->m_file_no,
32432            pagePtr.p->m_page_no,
32433            cnoOfLogPages);
32434 
32435   logPagePtr.i = pagePtr.i;
32436   ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
32437 
32438   Ptr<LogPageRecord> prevPagePtr, nextPagePtr;
32439   prevPagePtr.i = logPagePtr.p->logPageWord[ZPREV_PAGE];
32440   nextPagePtr.i = logPagePtr.p->logPageWord[ZNEXT_PAGE];
32441   if (prevPagePtr.i != RNIL)
32442   {
32443     jam();
32444     /**
32445      * Remove ZNEXT pointer from prevPagePtr
32446      *   so we don't try to "serve" multi-page request
32447      *   if next-page has been evicted
32448      */
32449     ptrCheckGuard(prevPagePtr, clogPageFileSize, logPageRecord);
32450     ndbrequire(prevPagePtr.p->logPageWord[ZNEXT_PAGE] == logPagePtr.i);
32451     prevPagePtr.p->logPageWord[ZNEXT_PAGE] = RNIL;
32452   }
32453 
32454   if (nextPagePtr.i != RNIL)
32455   {
32456     jam();
32457     /**
32458      * Remove ZPREV pointer from nextPagePtr
32459      *   so don't try to do above if prev has been evicted
32460      */
32461     ptrCheckGuard(nextPagePtr, clogPageFileSize, logPageRecord);
32462     ndbrequire(nextPagePtr.p->logPageWord[ZPREV_PAGE] == logPagePtr.i);
32463     nextPagePtr.p->logPageWord[ZPREV_PAGE] = RNIL;
32464   }
32465 
32466   releaseLogpage(0);
32467   logPagePtr = save;
32468 }
32469 
32470 void
evict(RedoPageCache & cache,Uint32 cnt)32471 Dblqh::evict(RedoPageCache& cache, Uint32 cnt)
32472 {
32473   while (cnoOfLogPages < (cnt + MIN_REDO_PAGES_FREE) && !cache.m_lru.isEmpty())
32474   {
32475     jam();
32476     Ptr<RedoCacheLogPageRecord> pagePtr;
32477     cache.m_lru.last(pagePtr);
32478     do_evict(cache, pagePtr);
32479   }
32480 }
32481 
32482 void
addCachePages(RedoPageCache & cache,Uint32 partNo,Uint32 startPageNo,LogFileOperationRecord * lfoPtrP)32483 Dblqh::addCachePages(RedoPageCache& cache,
32484                      Uint32 partNo,
32485                      Uint32 startPageNo,
32486                      LogFileOperationRecord* lfoPtrP)
32487 {
32488   Uint32 cnt = lfoPtrP->noPagesRw;
32489   Ptr<LogFileRecord> filePtr;
32490   filePtr.i = lfoPtrP->logFileRec;
32491   ptrCheckGuard(filePtr, clogFileFileSize, logFileRecord);
32492 
32493   evict(cache, 0);
32494 
32495   if (cnoOfLogPages < cnt + MIN_REDO_PAGES_FREE)
32496   {
32497     /**
32498      * Don't cache if low on redo-buffer
32499      */
32500     return;
32501   }
32502 
32503   for (Uint32 i = 0; i<cnt ; i++)
32504   {
32505     Ptr<RedoCacheLogPageRecord> pagePtr;
32506     pagePtr.i = lfoPtrP->logPageArray[i];
32507     cache.m_pool.getPtr(pagePtr);
32508     pagePtr.p->m_part_no = partNo;
32509     pagePtr.p->m_page_no = startPageNo + i;
32510     pagePtr.p->m_file_no = filePtr.p->fileNo;
32511 
32512     bool found = false;
32513     {
32514       RedoCacheLogPageRecord key;
32515       key.m_part_no = partNo;
32516       key.m_page_no = startPageNo + i;
32517       key.m_file_no = filePtr.p->fileNo;
32518       Ptr<RedoCacheLogPageRecord> tmp;
32519       if (cache.m_hash.find(tmp, key))
32520       {
32521         jam();
32522         found = true;
32523         do_evict(cache, tmp);
32524       }
32525     }
32526 
32527     cache.m_hash.add(pagePtr);
32528     cache.m_lru.addFirst(pagePtr);
32529     if (0)
32530     ndbout_c("adding(%u) part: %u file: %u page: %u cnoOfLogPages: %u cnt: %u",
32531              found,
32532              pagePtr.p->m_part_no,
32533              pagePtr.p->m_file_no,
32534              pagePtr.p->m_page_no,
32535              cnoOfLogPages,
32536              cnt);
32537   }
32538 
32539   /**
32540    * Make sure pages are not released when prepare-record is executed
32541    * @see releaseLfoPages
32542    */
32543   lfoPtrP->firstLfoPage = RNIL;
32544 }
32545 
32546 void
release(RedoPageCache & cache)32547 Dblqh::release(RedoPageCache& cache)
32548 {
32549   while (!cache.m_lru.isEmpty())
32550   {
32551     jam();
32552     Ptr<RedoCacheLogPageRecord> pagePtr;
32553     cache.m_lru.last(pagePtr);
32554     cache.m_lru.remove(pagePtr);
32555 
32556     logPagePtr.i = pagePtr.i;
32557     ptrCheckGuard(logPagePtr, clogPageFileSize, logPageRecord);
32558     releaseLogpage(0);
32559   }
32560   cache.m_hash.removeAll();
32561 
32562 #if defined VM_TRACE || defined ERROR_INSERT || 1
32563   ndbout_c("RedoPageCache: avoided %u (%u/%u) page-reads",
32564            cache.m_hits, cache.m_multi_page, cache.m_multi_miss);
32565 #endif
32566   cache.m_hits = 0;
32567   cache.m_multi_page = 0;
32568   cache.m_multi_miss = 0;
32569 }
32570 #endif
32571 
32572 #ifndef NO_REDO_OPEN_FILE_CACHE
32573 
32574 #define MAX_CACHED_OPEN_FILES 4
32575 
32576 void
openFileRw_cache(Signal * signal,LogFileRecordPtr filePtr)32577 Dblqh::openFileRw_cache(Signal* signal,
32578                         LogFileRecordPtr filePtr)
32579 {
32580   jam();
32581 
32582   LogFileRecord::LogFileStatus state = filePtr.p->logFileStatus;
32583   if (state != LogFileRecord::CLOSED)
32584   {
32585     jam();
32586 
32587     m_redo_open_file_cache.m_hits++;
32588 
32589     if (m_redo_open_file_cache.m_lru.hasPrev(filePtr))
32590     {
32591       jam();
32592       m_redo_open_file_cache.m_lru.remove(filePtr);
32593       m_redo_open_file_cache.m_lru.addFirst(filePtr);
32594     }
32595 
32596     filePtr.p->logFileStatus = LogFileRecord::OPEN_EXEC_LOG_CACHED;
32597 
32598     signal->theData[0] = filePtr.i;
32599     signal->theData[1] = filePtr.p->fileRef;
32600     sendSignal(reference(), GSN_FSOPENCONF, signal, 2, JBB);
32601     return;
32602   }
32603 
32604   filePtr.p->logFileStatus = LogFileRecord::OPEN_EXEC_LOG;
32605   openFileRw(signal, filePtr, false); /* No write buffering */
32606 }
32607 
32608 void
closeFile_cache(Signal * signal,LogFileRecordPtr filePtr,Uint32 line)32609 Dblqh::closeFile_cache(Signal* signal,
32610                        LogFileRecordPtr filePtr,
32611                        Uint32 line)
32612 {
32613   jam();
32614 
32615   filePtr.p->logFileStatus = LogFileRecord::CLOSING_EXEC_LOG_CACHED;
32616   if (m_redo_open_file_cache.m_lru.getCount() >= MAX_CACHED_OPEN_FILES)
32617   {
32618     jam();
32619     Ptr<LogFileRecord> evictPtr;
32620     Uint32 logPartRec = filePtr.p->logPartRec;
32621     /**
32622      * Only evict file with same log-part, other redo-execution will continue
32623      *   for the log-part once file is closed
32624      *
32625      * Note: 1) loop is guaranteed to terminate as filePtr must be in list
32626      *       2) loop is ok as MAX_CACHED_OPEN_FILES is "small"
32627      *          (if it was big, the m_lru should be split per log-part)
32628      */
32629     m_redo_open_file_cache.m_lru.last(evictPtr);
32630     while (evictPtr.p->logPartRec != logPartRec)
32631     {
32632       jam();
32633       ndbrequire(m_redo_open_file_cache.m_lru.prev(evictPtr));
32634     }
32635     m_redo_open_file_cache.m_lru.remove(evictPtr);
32636     evictPtr.p->logFileStatus = LogFileRecord::CLOSING_EXEC_LOG;
32637     closeFile(signal, evictPtr, line);
32638   }
32639   else
32640   {
32641     jam();
32642     signal->theData[0] = ZEXEC_SR;
32643     signal->theData[1] = filePtr.p->logPartRec;
32644     sendSignal(cownref, GSN_CONTINUEB, signal, 2, JBB);
32645   }
32646 }
32647 
32648 void
release(Signal * signal,RedoOpenFileCache & cache)32649 Dblqh::release(Signal* signal, RedoOpenFileCache & cache)
32650 {
32651   Ptr<LogFileRecord> closePtr;
32652 
32653   while (m_redo_open_file_cache.m_lru.first(closePtr))
32654   {
32655     jam();
32656     m_redo_open_file_cache.m_lru.remove(closePtr);
32657     if (closePtr.p->logFileStatus == LogFileRecord::CLOSING_EXEC_LOG_CACHED)
32658     {
32659       jam();
32660       closePtr.p->logFileStatus = LogFileRecord::CLOSING_EXEC_LOG_CACHED;
32661       m_redo_open_file_cache.m_close_cnt ++;
32662       signal->theData[0] = closePtr.p->fileRef;
32663       signal->theData[1] = reference();
32664       signal->theData[2] = closePtr.i;
32665       signal->theData[3] = ZCLOSE_NO_DELETE;
32666       signal->theData[4] = __LINE__;
32667       sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, signal, 5, JBA);
32668       return;
32669     }
32670     else
32671     {
32672       ndbout_c("Found file with state: %u",
32673                closePtr.p->logFileStatus);
32674     }
32675   }
32676 
32677   ndbout_c("RedoOpenFileCache: Avoided %u file-open/close closed: %u",
32678            m_redo_open_file_cache.m_hits,
32679            m_redo_open_file_cache.m_close_cnt);
32680   m_redo_open_file_cache.m_hits = 0;
32681   m_redo_open_file_cache.m_close_cnt = 0;
32682   execLogComp_extra_files_closed(signal);
32683 }
32684 
32685 #endif
32686 
32687 bool
check_ndb_versions() const32688 Dblqh::check_ndb_versions() const
32689 {
32690   Uint32 version = getNodeInfo(getOwnNodeId()).m_version;
32691   for (Uint32 i = 0; i < cnoOfNodes; i++)
32692   {
32693     Uint32 node = cnodeData[i];
32694     if (cnodeStatus[i] == ZNODE_UP)
32695     {
32696       if(getNodeInfo(node).m_version != version)
32697       {
32698         return false;
32699       }
32700     }
32701   }
32702   return true;
32703 }
32704 
32705 void
handle_check_system_scans(Signal * signal)32706 Dblqh::handle_check_system_scans(Signal *signal)
32707 {
32708   ScanRecordPtr loc_scanptr;
32709   TcConnectionrecPtr loc_tcConnectptr;
32710   for (Uint32 i = 0; i <= ZCOPY_FRAGREQ_CHECK_INDEX; i++)
32711   {
32712     jam();
32713     if (c_check_scanptr_i[i] != RNIL)
32714     {
32715       loc_scanptr.i = c_check_scanptr_i[i];
32716       ndbrequire(c_scanRecordPool.getValidPtr(loc_scanptr));
32717       loc_tcConnectptr.i = loc_scanptr.p->scanTcrec;
32718       ndbrequire(tcConnect_pool.getValidPtr(loc_tcConnectptr));
32719       if (loc_scanptr.p->scan_lastSeen == c_check_scanptr_save_line[i] &&
32720           loc_tcConnectptr.p->tcTimer != 0 &&
32721           loc_tcConnectptr.p->tcTimer == c_check_scanptr_save_timer[i])
32722       {
32723         /**
32724          * We are at the same line, the time is still the same as the last
32725          * 10 second.
32726          * Report where we are and what type of scan that has stalled.
32727          */
32728         jam();
32729         Uint32 time_stalled =
32730           cLqhTimeOutCount - c_check_scanptr_save_timer[i];
32731         time_stalled *= 10;
32732         if (time_stalled < 10000)
32733         {
32734           jam();
32735           /* Ignore delays smaller than 10 seconds */
32736           continue;
32737         }
32738         if (i == ZLCP_CHECK_INDEX)
32739         {
32740           jam();
32741           g_eventLogger->info("LCP Scan have stalled for %u milliseconds, last"
32742                               "last seen on line %u, check_lcp_stop_count: %u",
32743                               time_stalled,
32744                               c_check_scanptr_save_line[i],
32745                               loc_scanptr.p->scan_check_lcp_stop);
32746         }
32747         else if (i == ZBACKUP_CHECK_INDEX)
32748         {
32749           jam();
32750           g_eventLogger->info("Backup Scan have stalled for %u milliseconds, "
32751                               "last seen on line %u, check_lcp_stop_count: %u",
32752                               time_stalled,
32753                               c_check_scanptr_save_line[i],
32754                               loc_scanptr.p->scan_check_lcp_stop);
32755         }
32756         else if (i == ZCOPY_FRAGREQ_CHECK_INDEX)
32757         {
32758           jam();
32759           g_eventLogger->info("COPY_FRAGREQ Scan have stalled for %u "
32760                               "milliseconds, "
32761                               "last seen on line %u, check_lcp_stop_count: %u",
32762                               time_stalled,
32763                               c_check_scanptr_save_line[i],
32764                               loc_scanptr.p->scan_check_lcp_stop);
32765           signal->theData[0] = DumpStateOrd::AccDumpOneScanRec;
32766           signal->theData[1] = loc_scanptr.p->scanAccPtr;
32767           EXECUTE_DIRECT(DBACC, GSN_DUMP_STATE_ORD, signal, 2);
32768           signal->theData[0] = DumpStateOrd::LqhDumpOneCopyTcRec;
32769           signal->theData[1] = loc_tcConnectptr.i;
32770           EXECUTE_DIRECT(DBLQH, GSN_DUMP_STATE_ORD, signal, 2);
32771         }
32772         signal->theData[0] = DumpStateOrd::LqhDumpOneScanRec;
32773         signal->theData[1] = loc_scanptr.i;
32774         EXECUTE_DIRECT(DBLQH, GSN_DUMP_STATE_ORD, signal, 2);
32775         if (time_stalled >= 120000 &&
32776             (i != ZCOPY_FRAGREQ_CHECK_INDEX))
32777         {
32778           /**
32779            * LCP and Backup scans proceed even in the presence of locks,
32780            * COPY_FRAGREQ scans can be held up on locks without any
32781            * real limits since transactions have no specified maximum
32782            * time.
32783            */
32784           jam();
32785           abort();
32786         }
32787       }
32788       else
32789       {
32790         jam();
32791         c_check_scanptr_save_line[i] = loc_scanptr.p->scan_lastSeen;
32792         c_check_scanptr_save_timer[i] = loc_tcConnectptr.p->tcTimer;
32793       }
32794     }
32795     else
32796     {
32797       c_check_scanptr_save_timer[i] = cLqhTimeOutCount;
32798     }
32799   }
32800 }
32801 
32802 void
suspendFile(Signal * signal,Uint32 filePtrI,Uint32 millis)32803 Dblqh::suspendFile(Signal* signal, Uint32 filePtrI, Uint32 millis)
32804 {
32805   Ptr<LogFileRecord> tmp;
32806   tmp.i = filePtrI;
32807   ptrCheckGuard(tmp, clogFileFileSize, logFileRecord);
32808   suspendFile(signal, tmp, millis);
32809 }
32810 
32811 void
suspendFile(Signal * signal,Ptr<LogFileRecord> logFilePtr,Uint32 millis)32812 Dblqh::suspendFile(Signal* signal, Ptr<LogFileRecord> logFilePtr, Uint32 millis)
32813 {
32814   SaveSignal<FsSuspendOrd::SignalLength> tmp(signal);
32815   signal->theData[0] = logFilePtr.p->fileRef;
32816   signal->theData[1] = millis;
32817   sendSignal(NDBFS_REF, GSN_FSSUSPENDORD, signal, 2, JBA);
32818 }
32819 
32820 void
send_runredo_event(Signal * signal,LogPartRecord * lp,Uint32 gci)32821 Dblqh::send_runredo_event(Signal* signal, LogPartRecord * lp, Uint32 gci)
32822 {
32823   signal->theData[0] = NDB_LE_RunRedo;
32824   signal->theData[1] = lp->logPartNo;
32825   signal->theData[2] = csrPhasesCompleted;
32826   signal->theData[3] = lp->logStartGci;
32827   signal->theData[4] = gci;
32828   signal->theData[5] = lp->logLastGci;
32829 
32830 
32831   LogFileRecordPtr filePtr;
32832   filePtr.i = lp->startLogfile;
32833   ptrCheckGuard(filePtr, clogFileFileSize, logFileRecord);
32834   signal->theData[6] = filePtr.p->fileNo;
32835   signal->theData[7] = lp->startMbyte;
32836 
32837   filePtr.i = lp->currentLogfile;
32838   ptrCheckGuard(filePtr, clogFileFileSize, logFileRecord);
32839   signal->theData[8] = filePtr.p->fileNo;
32840   signal->theData[9] = filePtr.p->currentMbyte;
32841 
32842   filePtr.i = lp->stopLogfile;
32843   ptrCheckGuard(filePtr, clogFileFileSize, logFileRecord);
32844   signal->theData[10] = filePtr.p->fileNo;
32845   signal->theData[11] = lp->stopMbyte;
32846   sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 12, JBB);
32847 }
32848 
32849 void
init(Uint32 partNo)32850 Dblqh::IOTracker::init(Uint32 partNo)
32851 {
32852   bzero(this, sizeof(* this));
32853   m_log_part_no = partNo;
32854 }
32855 
32856 int
tick(Uint32 now,Uint32 maxlag,Uint32 maxlag_cnt)32857 Dblqh::IOTracker::tick(Uint32 now, Uint32 maxlag, Uint32 maxlag_cnt)
32858 {
32859   Uint32 t = m_current_time;
32860 
32861   if ((t / SAMPLE_TIME) == (now / SAMPLE_TIME))
32862     return 0;
32863 
32864   m_redo_written_bytes += m_sample_completed_bytes;
32865 
32866   m_current_time = now;
32867   if (m_sample_completed_bytes >= m_sample_sent_bytes)
32868   {
32869     /**
32870      * If we completed all io we sent during current sample...
32871      *   we can't have any problem...and
32872      *   we can't measure io throughput, so don't add measurement
32873      *
32874      */
32875     m_sample_sent_bytes = 0;
32876     m_sample_completed_bytes = 0;
32877   }
32878   else
32879   {
32880     // io maxed out...
32881     Uint32 elapsed = now - t;
32882     m_save_written_bytes[m_save_pos] += m_sample_completed_bytes;
32883     m_save_elapsed_millis[m_save_pos] += elapsed;
32884 
32885     m_curr_written_bytes += m_sample_completed_bytes;
32886     m_curr_elapsed_millis += elapsed;
32887 
32888     Uint32 bps = (1000 * m_sample_completed_bytes) / (elapsed)?elapsed:1;
32889     Uint32 lag = bps ? m_sum_outstanding_bytes / bps : 30;
32890     if (false && lag >= 30)
32891     {
32892       g_eventLogger->info("part: %u tick(%u) m_sample_completed_bytes: %ukb "
32893                           "m_sample_sent_bytes: %ukb elapsed: %u kbps: %u lag:"
32894                           " %u",
32895                           m_log_part_no,
32896                           now,
32897                           Uint32(m_sample_completed_bytes / 1024),
32898                           Uint32(m_sample_sent_bytes/1024),
32899                           elapsed, bps/1000, lag);
32900     }
32901 
32902     m_sample_sent_bytes -= m_sample_completed_bytes;
32903     m_sample_completed_bytes = 0;
32904   }
32905 
32906   int retVal = 0;
32907   Uint32 save_lag_cnt = m_lag_cnt;
32908   if ((now / SLIDING_WINDOW_LEN) != (t / SLIDING_WINDOW_LEN))
32909   {
32910     Uint32 lag = m_curr_written_bytes ?
32911       ((Uint64(m_sum_outstanding_bytes) / 1000) *
32912         Uint64(m_curr_elapsed_millis)) / m_curr_written_bytes :
32913       0;
32914 
32915     m_lag_in_seconds = lag;
32916 
32917     if (lag > maxlag)
32918     {
32919       /**
32920        * We did have lag last second...
32921        *   increase m_lag_cnt and check if it has reached maxlag_cnt
32922        */
32923       Uint32 tmp = m_lag_cnt;
32924       m_lag_cnt += (lag / (maxlag)?maxlag:1);
32925       if (tmp < maxlag_cnt && m_lag_cnt >= maxlag_cnt)
32926       {
32927         retVal = -1; // start aborting transactions
32928       }
32929     }
32930     else
32931     {
32932       /**
32933        * We did not have lag...reset m_lag_cnt
32934        */
32935       if (m_lag_cnt >= maxlag_cnt)
32936       {
32937         // stop aborting transactions
32938         retVal = 1;
32939       }
32940       m_lag_cnt = 0;
32941     }
32942 
32943 #if 1
32944     if (m_lag_cnt == 0 && lag == 0)
32945     {
32946     }
32947     else if (lag > 0 && m_lag_cnt == 0)
32948     {
32949       g_eventLogger->info("part: %u : time to complete: %u",
32950                           m_log_part_no, lag);
32951     }
32952     else if (m_lag_cnt < maxlag_cnt && m_lag_cnt == save_lag_cnt)
32953     {
32954       g_eventLogger->info("part: %u : time to complete: %u lag_cnt:"
32955                           " %u => %u => retVal: %d",
32956                           m_log_part_no,
32957                           lag,
32958                           save_lag_cnt,
32959                           m_lag_cnt,
32960                           retVal);
32961     }
32962     else
32963     {
32964       g_eventLogger->info("part: %u : sum_outstanding: %ukb avg_written:"
32965                           " %ukb avg_elapsed: %ums time to complete:"
32966                           " %u lag_cnt: %u => %u retVal: %d",
32967                           m_log_part_no,
32968                           Uint32(m_sum_outstanding_bytes / 1024),
32969                           Uint32(m_curr_written_bytes/1024),
32970                           m_curr_elapsed_millis,
32971                           lag,
32972                           save_lag_cnt,
32973                           m_lag_cnt,
32974                           retVal);
32975     }
32976 #endif
32977 
32978     /**
32979      * And finally rotate sliding window
32980      */
32981     Uint32 last = (m_save_pos + 1) % SLIDING_WINDOW_HISTORY_LEN;
32982     assert(m_curr_written_bytes >= m_save_written_bytes[last]);
32983     assert(m_curr_elapsed_millis >= m_save_elapsed_millis[last]);
32984     m_curr_written_bytes -= m_save_written_bytes[last];
32985     m_curr_elapsed_millis -= m_save_elapsed_millis[last];
32986     m_save_written_bytes[last] = 0;
32987     m_save_elapsed_millis[last] = 0;
32988     m_save_pos = last;
32989   }
32990   return retVal;
32991 }
32992 
32993 void
send_io(Uint32 bytes)32994 Dblqh::IOTracker::send_io(Uint32 bytes)
32995 {
32996   m_sum_outstanding_bytes += bytes;
32997   m_sample_sent_bytes += bytes;
32998 }
32999 
33000 void
complete_io(Uint32 bytes)33001 Dblqh::IOTracker::complete_io(Uint32 bytes)
33002 {
33003   assert(m_sum_outstanding_bytes >= bytes);
33004 
33005   m_sum_outstanding_bytes -= bytes;
33006   m_sample_completed_bytes += bytes;
33007 }
33008 
33009 void
mark_end_of_lcp_restore(Signal * signal)33010 Dblqh::mark_end_of_lcp_restore(Signal* signal)
33011 {
33012   jam();
33013 
33014   /* Get Restore to summarise the work and rates */
33015   /* Done directly to keep expected ordering between log messages +
33016    * reduce cognitive burden on readers
33017    */
33018   signal->theData[0] = DumpStateOrd::RestoreRates;
33019   EXECUTE_DIRECT(RESTORE, GSN_DUMP_STATE_ORD, signal, 1);
33020   jamEntry();
33021 
33022   /* Todo : Summarise non-trans copy stats if relevant */
33023 
33024   /* Summarise our work */
33025   g_eventLogger->info("LDM(%u): Completed fuzzy restore %u fragments (%u from LCP,"
33026                       " %u by non-trans copy)",
33027                       instance(),
33028                       c_fragmentsStarted,
33029                       c_fragmentsStarted - c_fragmentsStartedWithCopy,
33030                       c_fragmentsStartedWithCopy);
33031   g_eventLogger->info("LDM(%u): Starting DD Undo log application",
33032                       instance());
33033 
33034   sendLOCAL_RECOVERY_COMPLETE_REP(signal,
33035                  LocalRecoveryCompleteRep::RESTORE_FRAG_COMPLETED);
33036 }
33037 
33038 void
log_fragment_copied(Signal * signal)33039 Dblqh::log_fragment_copied(Signal* signal)
33040 {
33041   jam();
33042 
33043   /* Get fragment's stats from TUP */
33044   const Dbtup::FragStats fs
33045       = c_tup->get_frag_stats(fragptr.p->tupFragptr);
33046 
33047   const Uint64 fragRows = fs.committedRowCount;
33048 
33049   Uint64 percentChanged = (fragRows ?
33050         ((c_fragCopyRowsIns + c_fragCopyRowsDel) * 100) / fragRows
33051                            : 0);
33052 
33053   /* Have already copied a fragment...report on it now */
33054   g_eventLogger->info("LDM(%u): Completed copy of fragment T%uF%u. "
33055                       "Changed +%llu/-%llu rows, %llu bytes. "
33056                       "%llu pct churn to %llu rows.",
33057                       instance(),
33058                       c_fragCopyTable,
33059                       c_fragCopyFrag,
33060                       c_fragCopyRowsIns,
33061                       c_fragCopyRowsDel,
33062                       c_fragBytesCopied,
33063                       percentChanged,
33064                       fragRows);
33065 
33066   c_totalCopyRowsIns+= c_fragCopyRowsIns;
33067   c_totalCopyRowsDel+= c_fragCopyRowsDel;
33068   c_totalBytesCopied+= c_fragBytesCopied;
33069   c_fragCopyTable = RNIL;
33070   c_fragCopyFrag = RNIL;
33071   c_fragCopyRowsIns = 0;
33072   c_fragCopyRowsDel = 0;
33073   c_fragBytesCopied = 0;
33074 }
33075 
33076 /**
33077  * We assist the Backup block in reporting disk write speeds by sending the
33078  * number of bytes written and completed since last time we were asked.
33079  */
33080 Uint64
report_redo_written_bytes()33081 Dblqh::report_redo_written_bytes()
33082 {
33083   Uint64 redo_written_bytes = 0;
33084   for (logPartPtr.i = 0; logPartPtr.i < clogPartFileSize; logPartPtr.i++)
33085   {
33086     ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
33087     redo_written_bytes +=
33088       logPartPtr.p->m_io_tracker.get_and_reset_redo_written_bytes();
33089   }
33090   return redo_written_bytes;
33091 }
33092 
33093 /**
33094  * We define REDO log IO lagging as when at least 2 seconds of log data has been
33095  * sent to the file system and not yet been completed.
33096  */
33097 bool
is_ldm_instance_io_lagging()33098 Dblqh::is_ldm_instance_io_lagging()
33099 {
33100   bool io_lag_now = false;
33101   do
33102   {
33103     for (logPartPtr.i = 0; logPartPtr.i < clogPartFileSize; logPartPtr.i++)
33104     {
33105       ptrCheckGuard(logPartPtr, clogPartFileSize, logPartRecord);
33106       if (logPartPtr.p->m_io_tracker.get_lag_in_seconds() >= 2)
33107       {
33108         jam();
33109         io_lag_now = true;
33110         break;
33111       }
33112       jam();
33113     }
33114   } while (0);
33115 
33116   Int32 change = 0;
33117   if (c_is_io_lag_reported)
33118   {
33119     jam();
33120     if (!io_lag_now)
33121     {
33122       jam();
33123       change = Int32(-1);
33124     }
33125   }
33126   else
33127   {
33128     jam();
33129     if (io_lag_now)
33130     {
33131       jam();
33132       change = Int32(+1);
33133     }
33134   }
33135   c_is_io_lag_reported = io_lag_now;
33136   return change_and_get_io_laggers(change) == 0 ? false : true;
33137 }
33138 
33139 
get_committed_mbytes(LogPartRecord * logPartPtrP)33140 Uint32 Dblqh::get_committed_mbytes(LogPartRecord* logPartPtrP)
33141 {
33142   Uint64 committed_words = logPartPtrP->m_committed_words;
33143   Uint64 committed_mbytes = committed_words >> 18;
33144   committed_mbytes += 1;
33145   return (Uint32)committed_mbytes;
33146 }
33147 
increment_committed_mbytes(LogPartRecord * logPartPtrP,TcConnectionrec * regTcPtr)33148 void Dblqh::increment_committed_mbytes(LogPartRecord* logPartPtrP,
33149                                        TcConnectionrec* regTcPtr)
33150 {
33151   logPartPtrP->m_committed_words += (ZCOMMIT_LOG_SIZE + 1);
33152   regTcPtr->m_committed_log_space = 1;
33153 }
33154 
decrement_committed_mbytes(LogPartRecord * logPartPtrP,TcConnectionrec * regTcPtr)33155 void Dblqh::decrement_committed_mbytes(LogPartRecord* logPartPtrP,
33156                                        TcConnectionrec* regTcPtr)
33157 {
33158   ndbassert(logPartPtrP->m_committed_words >= (ZCOMMIT_LOG_SIZE + 1));
33159   logPartPtrP->m_committed_words -= (ZCOMMIT_LOG_SIZE + 1);
33160   ndbassert(regTcPtr->m_committed_log_space == 1);
33161   regTcPtr->m_committed_log_space = 0;
33162 }
33163 
execLOCAL_LATEST_LCP_ID_REP(Signal * signal)33164 void Dblqh::execLOCAL_LATEST_LCP_ID_REP(Signal *signal)
33165 {
33166   jamEntry();
33167   m_restart_local_latest_lcp_id = signal->theData[0];
33168 }
33169 
33170 void
sendPoolShrink(const Uint32 pool_index)33171 Dblqh::sendPoolShrink(const Uint32 pool_index)
33172 {
33173   const bool need_send = c_transient_pools_shrinking.get(pool_index) == 0;
33174   c_transient_pools_shrinking.set(pool_index);
33175   if (need_send)
33176   {
33177     SignalT<2> signal2[1];
33178     Signal* signal = new (&signal2[0]) Signal(0);
33179     memset(signal2, 0, sizeof(signal2));
33180     signal->theData[0] = ZLQH_SHRINK_TRANSIENT_POOLS;
33181     signal->theData[1] = pool_index;
33182     sendSignal(reference(), GSN_CONTINUEB, (Signal*)signal, 2, JBB);
33183   }
33184 }
33185 
33186 void
shrinkTransientPools(Uint32 pool_index)33187 Dblqh::shrinkTransientPools(Uint32 pool_index)
33188 {
33189   ndbrequire(pool_index < c_transient_pool_count);
33190   ndbrequire(c_transient_pools_shrinking.get(pool_index));
33191   if (c_transient_pools[pool_index]->rearrange_free_list_and_shrink(1))
33192   {
33193     sendPoolShrink(pool_index);
33194   }
33195   else
33196   {
33197     c_transient_pools_shrinking.clear(pool_index);
33198   }
33199 }
33200