1 /*
2    Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 
25 #ifndef DBDIH_H
26 #define DBDIH_H
27 
28 #include <ndb_limits.h>
29 #include <pc.hpp>
30 #include <SimulatedBlock.hpp>
31 #include "Sysfile.hpp"
32 #include <SignalCounter.hpp>
33 
34 #include <signaldata/MasterLCP.hpp>
35 #include <signaldata/CopyGCIReq.hpp>
36 #include <blocks/mutexes.hpp>
37 #include <signaldata/LCP.hpp>
38 #include <NdbSeqLock.hpp>
39 
40 #ifdef DBDIH_C
41 
42 /*###################*/
43 /* FILE SYSTEM FLAGS */
44 /*###################*/
45 #define ZLIST_OF_PAIRS 0
46 #define ZLIST_OF_PAIRS_SYNCH 16
47 #define ZOPEN_READ_WRITE 2
48 #define ZCREATE_READ_WRITE 0x302
49 #define ZCLOSE_NO_DELETE 0
50 #define ZCLOSE_DELETE 1
51 
52 /*###############*/
53 /* NODE STATES   */
54 /*###############*/
55 #define ZIDLE 0
56 #define ZACTIVE 1
57 
58 /*#########*/
59 /* GENERAL */
60 /*#########*/
61 #define ZVAR_NO_WORD 1
62 #define ZVAR_NO_CRESTART_INFO 20
63 #define ZVAR_NO_CRESTART_INFO_TO_FILE 21
64 #define ZVALID 1
65 #define ZINVALID 2
66 
67 /*###############*/
68 /*  ERROR CODES  */
69 /*###############*/
70 // ------------------------------------------
71 // Error Codes for Transactions (None sofar)
72 // ------------------------------------------
73 #define ZUNDEFINED_FRAGMENT_ERROR 311
74 
75 // --------------------------------------
76 // Error Codes for Add Table
77 // --------------------------------------
78 #define ZREPLERROR1 306
79 #define ZREPLERROR2 307
80 
81 // --------------------------------------
82 // Crash Codes
83 // --------------------------------------
84 #define ZCOULD_NOT_OCCUR_ERROR 300
85 #define ZNOT_MASTER_ERROR 301
86 #define ZWRONG_FAILURE_NUMBER_ERROR 302
87 #define ZWRONG_START_NODE_ERROR 303
88 #define ZNO_REPLICA_FOUND_ERROR 304
89 
90 // --------------------------------------
91 // Codes from LQH
92 // --------------------------------------
93 #define ZNODE_FAILURE_ERROR 400
94 
95 
96 /*#########*/
97 /* PHASES  */
98 /*#########*/
99 #define ZNDB_SPH1 1
100 #define ZNDB_SPH2 2
101 #define ZNDB_SPH3 3
102 #define ZNDB_SPH4 4
103 #define ZNDB_SPH5 5
104 #define ZNDB_SPH6 6
105 #define ZNDB_SPH7 7
106 #define ZNDB_SPH8 8
107 /*#########*/
108 /* SIZES   */
109 /*#########*/
110 #define ZPAGEREC 100
111 #define ZCREATE_REPLICA_FILE_SIZE 4
112 #define ZPROXY_MASTER_FILE_SIZE 10
113 #define ZPROXY_FILE_SIZE 10
114 #endif
115 
116 class Dbdih: public SimulatedBlock {
117 #ifdef ERROR_INSERT
118   typedef void (Dbdih::* SendFunction)(Signal*, Uint32, Uint32);
119 #endif
120 public:
121 
122   // Records
123 
124   /*������������������������������������������������������������������������
125    * THE API CONNECT RECORD IS THE SAME RECORD POINTER AS USED IN THE TC BLOCK
126    *
127    *  IT KEEPS TRACK OF ALL THE OPERATIONS CONNECTED TO THIS TRANSACTION.
128    *  IT IS LINKED INTO A QUEUE IN CASE THE GLOBAL CHECKPOINT IS CURRENTLY
129    * ONGOING */
130   struct ApiConnectRecord {
131     Uint64 apiGci;
132     Uint32 senderData;
133   };
134   typedef Ptr<ApiConnectRecord> ApiConnectRecordPtr;
135 
136   /*############## CONNECT_RECORD ##############*/
137   /*������������������������������������������������������������������������*/
138   /* THE CONNECT RECORD IS CREATED WHEN A TRANSACTION HAS TO START. IT KEEPS
139      ALL INTERMEDIATE INFORMATION NECESSARY FOR THE TRANSACTION FROM THE
140      DISTRIBUTED MANAGER. THE RECORD KEEPS INFORMATION ABOUT THE
141      OPERATIONS THAT HAVE TO BE CARRIED OUT BY THE TRANSACTION AND
142      ALSO THE TRAIL OF NODES FOR EACH OPERATION IN THE THE
143      TRANSACTION.
144   */
145   struct ConnectRecord {
146     enum ConnectState {
147       INUSE = 0,
148       FREE = 1,
149       STARTED = 2,
150       ALTER_TABLE = 3,
151       ALTER_TABLE_ABORT = 4, // "local" abort
152       ALTER_TABLE_REVERT = 5,
153       GET_TABINFO = 6
154     };
155     union {
156       Uint32 nodes[MAX_REPLICAS];
157       struct {
158         Uint32 m_changeMask;
159         Uint32 m_totalfragments;
160         Uint32 m_org_totalfragments;
161         Uint32 m_new_map_ptr_i;
162       } m_alter;
163       struct {
164         Uint32 m_map_ptr_i;
165       } m_create;
166       struct {
167         Uint32 m_requestInfo;
168       } m_get_tabinfo;
169     };
170     ConnectState connectState;
171     Uint32 nextPool;
172     Uint32 table;
173     Uint32 userpointer;
174     BlockReference userblockref;
175     Callback m_callback;
176   };
177   typedef Ptr<ConnectRecord> ConnectRecordPtr;
178 
179   /*������������������������������������������������������������������������*/
180   /*       THESE RECORDS ARE USED WHEN CREATING REPLICAS DURING SYSTEM      */
181   /*       RESTART. I NEED A COMPLEX DATA STRUCTURE DESCRIBING THE REPLICAS */
182   /*       I WILL TRY TO CREATE FOR EACH FRAGMENT.                          */
183   /*                                                                        */
184   /*       I STORE A REFERENCE TO THE FOUR POSSIBLE CREATE REPLICA RECORDS  */
185   /*       IN A COMMON STORED VARIABLE. I ALLOW A MAXIMUM OF 4 REPLICAS TO  */
186   /*       BE RESTARTED PER FRAGMENT.                                       */
187   /*������������������������������������������������������������������������*/
188   struct CreateReplicaRecord {
189     Uint32 logStartGci[MAX_LOG_EXEC];
190     Uint32 logStopGci[MAX_LOG_EXEC];
191     Uint16 logNodeId[MAX_LOG_EXEC];
192     Uint32 createLcpId;
193 
194     Uint32 replicaRec;
195     Uint16 dataNodeId;
196     Uint16 lcpNo;
197     Uint16 noLogNodes;
198   };
199   typedef Ptr<CreateReplicaRecord> CreateReplicaRecordPtr;
200 
201   /*������������������������������������������������������������������������*/
202   /*       THIS RECORD CONTAINS A FILE DESCRIPTION. THERE ARE TWO           */
203   /*       FILES PER TABLE TO RAISE SECURITY LEVEL AGAINST DISK CRASHES.    */
204   /*������������������������������������������������������������������������*/
205   struct FileRecord {
206     enum FileStatus {
207       CLOSED = 0,
208       CRASHED = 1,
209       OPEN = 2
210     };
211     enum FileType {
212       TABLE_FILE = 0,
213       GCP_FILE = 1
214     };
215     enum ReqStatus {
216       IDLE = 0,
217       CREATING_GCP = 1,
218       OPENING_GCP = 2,
219       OPENING_COPY_GCI = 3,
220       WRITING_COPY_GCI = 4,
221       CREATING_COPY_GCI = 5,
222       OPENING_TABLE = 6,
223       READING_GCP = 7,
224       READING_TABLE = 8,
225       WRITE_INIT_GCP = 9,
226       TABLE_CREATE = 10,
227       TABLE_WRITE = 11,
228       TABLE_CLOSE = 12,
229       CLOSING_GCP = 13,
230       CLOSING_TABLE_CRASH = 14,
231       CLOSING_TABLE_SR = 15,
232       CLOSING_GCP_CRASH = 16,
233       TABLE_OPEN_FOR_DELETE = 17,
234       TABLE_CLOSE_DELETE = 18
235     };
236     Uint32 fileName[4];
237     Uint32 fileRef;
238     FileStatus fileStatus;
239     FileType fileType;
240     Uint32 nextFile;
241     ReqStatus reqStatus;
242     Uint32 tabRef;
243   };
244   typedef Ptr<FileRecord> FileRecordPtr;
245 
246   /*������������������������������������������������������������������������*/
247   /* THIS RECORD KEEPS THE STORAGE AND DECISIONS INFORMATION OF A FRAGMENT  */
248   /* AND ITS REPLICAS. IF FRAGMENT HAS MORE THAN ONE BACK UP                */
249   /* REPLICA THEN A LIST OF MORE NODES IS ATTACHED TO THIS RECORD.          */
250   /* EACH RECORD IN MORE LIST HAS INFORMATION ABOUT ONE BACKUP. THIS RECORD */
251   /* ALSO HAVE THE STATUS OF THE FRAGMENT.                                  */
252   /*������������������������������������������������������������������������*/
253   /*                                                                        */
254   /*       FRAGMENTSTORE RECORD ALIGNED TO BE 64 BYTES                      */
255   /*������������������������������������������������������������������������*/
256   struct Fragmentstore {
257     Uint16 activeNodes[MAX_REPLICAS];
258     Uint32 preferredPrimary;
259 
260     Uint32 oldStoredReplicas;    /* "DEAD" STORED REPLICAS */
261     Uint32 storedReplicas;       /* "ALIVE" STORED REPLICAS */
262     Uint32 nextFragmentChunk;
263 
264     Uint32 m_log_part_id;
265 
266     Uint8 distributionKey;
267     Uint8 fragReplicas;
268     Uint8 noOldStoredReplicas;  /* NUMBER OF "DEAD" STORED REPLICAS */
269     Uint8 noStoredReplicas;     /* NUMBER OF "ALIVE" STORED REPLICAS*/
270     Uint8 noLcpReplicas;        ///< No of replicas remaining to be LCP:ed
271   };
272   typedef Ptr<Fragmentstore> FragmentstorePtr;
273 
274   /*########### PAGE RECORD ############*/
275   /*��������������������������������������������������������������������*/
276   /*       THIS RECORD KEEPS INFORMATION ABOUT NODE GROUPS.             */
277   /*��������������������������������������������������������������������*/
278   struct NodeGroupRecord {
279     Uint32 nodesInGroup[MAX_REPLICAS + 1];
280     Uint32 nextReplicaNode;
281     Uint32 nodeCount;
282     Uint32 activeTakeOver; // Which node...
283     Uint32 m_next_log_part;
284     Uint32 nodegroupIndex;
285     Uint32 m_ref_count;
286   };
287   typedef Ptr<NodeGroupRecord> NodeGroupRecordPtr;
288   /*��������������������������������������������������������������������*/
289   /*       THIS RECORD KEEPS INFORMATION ABOUT NODES.                   */
290   /*��������������������������������������������������������������������*/
291   /*       RECORD ALIGNED TO BE 64 BYTES.                               */
292   /*��������������������������������������������������������������������*/
293   enum NodefailHandlingStep {
294     NF_REMOVE_NODE_FROM_TABLE = 1,
295     NF_GCP_TAKE_OVER = 2,
296     NF_LCP_TAKE_OVER = 4
297   };
298 
299   struct NodeRecord {
300     NodeRecord();
301 
302     enum NodeStatus {
303       NOT_IN_CLUSTER = 0,
304       ALIVE = 1,
305       STARTING = 2,
306       DIED_NOW = 3,
307       DYING = 4,
308       DEAD = 5
309     };
310 
311     struct FragmentCheckpointInfo {
312       Uint32 tableId;
313       Uint32 fragId;
314       Uint32 replicaPtr;
315     };
316 
317     Sysfile::ActiveStatus activeStatus;
318 
319     NodeStatus nodeStatus;
320     bool useInTransactions;
321     bool allowNodeStart;
322     bool m_inclDihLcp;
323     Uint8 copyCompleted; // 0 = NO :-), 1 = YES, 2 = yes, first WAITING
324 
325     FragmentCheckpointInfo startedChkpt[2];
326     FragmentCheckpointInfo queuedChkpt[2];
327 
328     Bitmask<1> m_nodefailSteps;
329     Uint32 activeTabptr;
330     Uint32 nextNode;
331     Uint32 nodeGroup;
332 
333     SignalCounter m_NF_COMPLETE_REP;
334 
335     Uint8 dbtcFailCompleted;
336     Uint8 dblqhFailCompleted;
337     Uint8 dbdihFailCompleted;
338     Uint8 dbdictFailCompleted;
339     Uint8 recNODE_FAILREP;
340 
341     Uint8 noOfQueuedChkpt;
342     Uint8 noOfStartedChkpt;
343 
344     MasterLCPConf::State lcpStateAtTakeOver;
345     Uint32 m_remove_node_from_table_lcp_id;
346   };
347   typedef Ptr<NodeRecord> NodeRecordPtr;
348   /**********************************************************************/
349   /* THIS RECORD KEEPS THE INFORMATION ABOUT A TABLE AND ITS FRAGMENTS  */
350   /**********************************************************************/
351   struct PageRecord {
352     Uint32 word[2048];
353     /* 8 KBYTE PAGE*/
354     Uint32 nextfreepage;
355   };
356   typedef Ptr<PageRecord> PageRecordPtr;
357 
358   /************ REPLICA RECORD *************/
359   /**********************************************************************/
360   /* THIS RECORD KEEPS THE INFORMATION ABOUT A REPLICA OF A FRAGMENT    */
361   /**********************************************************************/
362   struct ReplicaRecord {
363     /* -------------------------------------------------------------------- */
364     /* THE GLOBAL CHECKPOINT IDENTITY WHEN THIS REPLICA WAS CREATED.        */
365     /* THERE IS ONE INDEX PER REPLICA. A REPLICA INDEX IS CREATED WHEN ANODE*/
366     /* CRASH OCCURS.                                                        */
367     /* -------------------------------------------------------------------- */
368     Uint32 createGci[8];
369     /* -------------------------------------------------------------------- */
370     /* THE LAST GLOBAL CHECKPOINT IDENTITY WHICH HAS BEEN SAVED ON DISK.    */
371     /* THIS VARIABLE IS ONLY VALID FOR REPLICAS WHICH HAVE "DIED". A REPLICA*/
372     /* "DIES" EITHER WHEN THE NODE CRASHES THAT KEPT THE REPLICA OR BY BEING*/
373     /* STOPPED IN A CONTROLLED MANNER.                                      */
374     /* THERE IS ONE INDEX PER REPLICA. A REPLICA INDEX IS CREATED WHEN ANODE*/
375     /* CRASH OCCURS.                                                        */
376     /* -------------------------------------------------------------------- */
377     Uint32 replicaLastGci[8];
378     /* -------------------------------------------------------------------- */
379     /* THE LOCAL CHECKPOINT IDENTITY OF A LOCAL CHECKPOINT.                 */
380     /* -------------------------------------------------------------------- */
381     Uint32 lcpId[MAX_LCP_STORED];
382     /* -------------------------------------------------------------------- */
383     /* THIS VARIABLE KEEPS TRACK OF THE MAXIMUM GLOBAL CHECKPOINT COMPLETED */
384     /* FOR EACH OF THE LOCAL CHECKPOINTS IN THIS FRAGMENT REPLICA.          */
385     /* -------------------------------------------------------------------- */
386     Uint32 maxGciCompleted[MAX_LCP_STORED];
387     /* -------------------------------------------------------------------- */
388     /* THIS VARIABLE KEEPS TRACK OF THE MINIMUM GLOBAL CHECKPOINT STARTEDFOR*/
389     /* EACH OF THE LOCAL CHECKPOINTS IN THIS FRAGMENT REPLICA.              */
390     /* -------------------------------------------------------------------- */
391     Uint32 maxGciStarted[MAX_LCP_STORED];
392     /* -------------------------------------------------------------------- */
393     /* THE GLOBAL CHECKPOINT IDENTITY WHEN THE TABLE WAS CREATED.           */
394     /* -------------------------------------------------------------------- */
395     Uint32 initialGci;
396 
397     /* -------------------------------------------------------------------- */
398     /* THE REFERENCE TO THE NEXT REPLICA. EITHER IT REFERS TO THE NEXT IN   */
399     /* THE FREE LIST OR IT REFERS TO THE NEXT IN A LIST OF REPLICAS ON A    */
400     /* FRAGMENT.                                                            */
401     /* -------------------------------------------------------------------- */
402     Uint32 nextReplica;
403 
404     /* -------------------------------------------------------------------- */
405     /*       THE NODE ID WHERE THIS REPLICA IS STORED.                      */
406     /* -------------------------------------------------------------------- */
407     Uint16 procNode;
408 
409     /* -------------------------------------------------------------------- */
410     /*    The last local checkpoint id started or queued on this replica.   */
411     /* -------------------------------------------------------------------- */
412     union {
413       Uint32 lcpIdStarted;   // Started or queued
414       Uint32 m_restorable_gci;
415     };
416 
417     /* -------------------------------------------------------------------- */
418     /* THIS VARIABLE SPECIFIES WHAT THE STATUS OF THE LOCAL CHECKPOINT IS.IT*/
419     /* CAN EITHER BE VALID OR INVALID. AT CREATION OF A FRAGMENT REPLICA ALL*/
420     /* LCP'S ARE INVALID. ALSO IF IF INDEX >= NO_LCP THEN THELOCALCHECKPOINT*/
421     /* IS ALWAYS INVALID. IF THE LCP BEFORE THE NEXT_LCP HAS LCP_ID THAT    */
422     /* DIFFERS FROM THE LATEST LCP_ID STARTED THEN THE NEXT_LCP IS ALSO     */
423     /* INVALID */
424     /* -------------------------------------------------------------------- */
425     Uint8 lcpStatus[MAX_LCP_STORED];
426 
427     /* -------------------------------------------------------------------- */
428     /*       THE NEXT LOCAL CHECKPOINT TO EXECUTE IN THIS FRAGMENT REPLICA. */
429     /* -------------------------------------------------------------------- */
430     Uint8 nextLcp;
431 
432     /* -------------------------------------------------------------------- */
433     /*       THE NUMBER OF CRASHED REPLICAS IN THIS REPLICAS SO FAR.        */
434     /* -------------------------------------------------------------------- */
435     Uint8 noCrashedReplicas;
436 
437     /**
438      * Is a LCP currently ongoing on fragment
439      */
440     Uint8 lcpOngoingFlag;
441   };
442   typedef Ptr<ReplicaRecord> ReplicaRecordPtr;
443 
444   /*************************************************************************
445    * TAB_DESCRIPTOR IS A DESCRIPTOR OF THE LOCATION OF THE FRAGMENTS BELONGING
446    * TO THE TABLE.THE INFORMATION ABOUT FRAGMENTS OF A TABLE ARE STORED IN
447    * CHUNKS OF FRAGMENTSTORE RECORDS.
448    * THIS RECORD ALSO HAS THE NECESSARY INFORMATION TO LOCATE A FRAGMENT AND
449    * TO LOCATE A FRAGMENT AND TO TRANSLATE A KEY OF A TUPLE TO THE FRAGMENT IT
450    * BELONGS
451    */
452   struct TabRecord
453   {
TabRecordDbdih::TabRecord454     TabRecord() { }
455 
456     /**
457      * rw-lock that protects multiple parallel DIGETNODES (readers) from
458      *   updates to fragmenation changes (e.g CREATE_FRAGREQ)...
459      *   search for DIH_TAB_WRITE_LOCK
460      */
461     NdbSeqLock m_lock;
462 
463     /**
464      * State for copying table description into pages
465      */
466     enum CopyStatus {
467       CS_IDLE,
468       CS_SR_PHASE1_READ_PAGES,
469       CS_SR_PHASE2_READ_TABLE,
470       CS_SR_PHASE3_COPY_TABLE,
471       CS_REMOVE_NODE,
472       CS_LCP_READ_TABLE,
473       CS_COPY_TAB_REQ,
474       CS_COPY_NODE_STATE,
475       CS_ADD_TABLE_MASTER,
476       CS_ADD_TABLE_SLAVE,
477       CS_INVALIDATE_NODE_LCP,
478       CS_ALTER_TABLE,
479       CS_COPY_TO_SAVE
480       ,CS_GET_TABINFO
481     };
482     /**
483      * State for copying pages to disk
484      */
485     enum UpdateState {
486       US_IDLE,
487       US_LOCAL_CHECKPOINT,
488       US_REMOVE_NODE,
489       US_COPY_TAB_REQ,
490       US_ADD_TABLE_MASTER,
491       US_ADD_TABLE_SLAVE,
492       US_INVALIDATE_NODE_LCP,
493       US_CALLBACK
494     };
495     enum TabLcpStatus {
496       TLS_ACTIVE = 1,
497       TLS_WRITING_TO_FILE = 2,
498       TLS_COMPLETED = 3
499     };
500     enum TabStatus {
501       TS_IDLE = 0,
502       TS_ACTIVE = 1,
503       TS_CREATING = 2,
504       TS_DROPPING = 3
505     };
506     enum Method {
507       LINEAR_HASH = 0,
508       NOTDEFINED = 1,
509       NORMAL_HASH = 2,
510       USER_DEFINED = 3,
511       HASH_MAP = 4
512     };
513     enum Storage {
514       ST_NOLOGGING = 0,         // Table is not logged, but survives SR
515       ST_NORMAL = 1,            // Normal table, logged and durable
516       ST_TEMPORARY = 2          // Table is lost after SR, not logged
517     };
518     CopyStatus tabCopyStatus;
519     UpdateState tabUpdateState;
520     TabLcpStatus tabLcpStatus;
521     TabStatus tabStatus;
522     Method method;
523     Storage tabStorage;
524 
525     Uint32 pageRef[32];
526 //-----------------------------------------------------------------------------
527 // Each entry in this array contains a reference to 16 fragment records in a
528 // row. Thus finding the correct record is very quick provided the fragment id.
529 //-----------------------------------------------------------------------------
530     Uint32 startFid[MAX_NDB_NODES * MAX_FRAG_PER_NODE / NO_OF_FRAGS_PER_CHUNK];
531 
532     Uint32 tabFile[2];
533     Uint32 connectrec;
534     union {
535       Uint32 hashpointer;
536       Uint32 m_new_map_ptr_i;
537     };
538     union {
539       Uint32 mask;
540       Uint32 m_map_ptr_i;
541     };
542     Uint32 noOfWords;
543     Uint32 schemaVersion;
544     Uint32 tabRemoveNode;
545     Uint32 totalfragments;
546     Uint32 noOfFragChunks;
547     Uint32 m_scan_count[2];
548     Uint32 m_scan_reorg_flag;
549     Uint32 tabErrorCode;
550     struct {
551       Uint32 tabUserRef;
552       Uint32 tabUserPtr;
553     } m_dropTab;
554 
555     Uint8 kvalue;
556     Uint8 noOfBackups;
557     Uint8 noPages;
558     Uint16 tableType;
559     Uint16 primaryTableId;
560 
561     // set in local protocol during prepare until commit
562     Uint32 schemaTransId;
563   };
564   typedef Ptr<TabRecord> TabRecordPtr;
565 
566   /***************************************************************************/
567   /* THIS RECORD IS USED TO KEEP TRACK OF TAKE OVER AND STARTING A NODE.    */
568   /* WE KEEP IT IN A RECORD TO ENABLE IT TO BE PARALLELISED IN THE FUTURE.  */
569   /**************************************************************************/
570   struct TakeOverRecord {
571 
572     /**
573      * States possible on slave (starting node)
574      */
575     enum ToSlaveStatus {
576       TO_SLAVE_IDLE = 0
577       ,TO_START_FRAGMENTS = 1      // Finding LCP for each fragment
578       ,TO_RUN_REDO = 2             // Waiting for local LQH to run REDO
579       ,TO_START_TO = 3             // Waiting for master (START_TOREQ)
580       ,TO_SELECTING_NEXT = 4       // Selecting next fragment to copy
581       ,TO_PREPARE_COPY = 5         // Waiting for local LQH (PREPARE_COPYREQ)
582       ,TO_UPDATE_BEFORE_STORED = 6 // Waiting on master (UPDATE_TOREQ)
583       ,TO_CREATE_FRAG_STORED = 7   // Waiting for all (CREATE_FRAGREQ stored)
584       ,TO_UPDATE_AFTER_STORED = 8  // Waiting for master (UPDATE_TOREQ)
585       ,TO_COPY_FRAG = 9            // Waiting for copy node (COPY_FRAGREQ)
586       ,TO_COPY_ACTIVE = 10         // Waiting for local LQH (COPY_ACTIVEREQ)
587       ,TO_UPDATE_BEFORE_COMMIT = 11// Waiting for master (UPDATE_TOREQ)
588       ,TO_CREATE_FRAG_COMMIT = 12  // Waiting for all (CREATE_FRAGREQ commit)
589       ,TO_UPDATE_AFTER_COMMIT = 13 // Waiting for master (UPDATE_TOREQ)
590 
591       ,TO_START_LOGGING = 14        // Enabling logging on all fragments
592       ,TO_SL_COPY_ACTIVE = 15       // Start logging: Copy active (local)
593       ,TO_SL_CREATE_FRAG = 16       // Start logging: Create Frag (dist)
594       ,TO_END_TO = 17               // Waiting for master (EBND_TOREQ)
595     };
596 
597     /**
598      * States possible on master
599      */
600     enum ToMasterStatus {
601       TO_MASTER_IDLE = 0
602       ,TO_MUTEX_BEFORE_STORED = 1  // Waiting for lock
603       ,TO_MUTEX_BEFORE_LOCKED = 2  // Lock held
604       ,TO_AFTER_STORED = 3         // No lock, but NGPtr reservation
605       ,TO_MUTEX_BEFORE_COMMIT = 4  // Waiting for lock
606       ,TO_MUTEX_BEFORE_SWITCH_REPLICA = 5 // Waiting for switch replica lock
607       ,TO_MUTEX_AFTER_SWITCH_REPLICA = 6
608       ,TO_WAIT_LCP = 7             // No locks, waiting for LCP
609     };
610 
611     Uint32 m_flags;       //
612     Uint32 m_senderRef;   // Who requested START_COPYREQ
613     Uint32 m_senderData;  // Data of sender
614 
615     Uint32 restorableGci; // Which GCI can be restore "locally" by node
616     Uint32 startGci;
617     Uint32 maxPage;
618     Uint32 toCopyNode;
619     Uint32 toCurrentFragid;
620     Uint32 toCurrentReplica;
621     Uint32 toCurrentTabref;
622     Uint32 toFailedNode;
623     Uint32 toStartingNode;
624     Uint64 toStartTime;
625     ToSlaveStatus toSlaveStatus;
626     ToMasterStatus toMasterStatus;
627 
628     MutexHandle2<DIH_SWITCH_PRIMARY_MUTEX> m_switchPrimaryMutexHandle;
629     MutexHandle2<DIH_FRAGMENT_INFO> m_fragmentInfoMutex;
630 
631     Uint32 nextList;
632     union {
633       Uint32 prevList;
634       Uint32 nextPool;
635     };
636   };
637   typedef Ptr<TakeOverRecord> TakeOverRecordPtr;
638 
getParam(const char * param,Uint32 * retVal)639   virtual bool getParam(const char * param, Uint32 * retVal) {
640     if (param && strcmp(param, "ActiveMutexes") == 0)
641     {
642       if (retVal)
643       {
644         * retVal = 5 + MAX_NDB_NODES;
645       }
646       return true;
647     }
648     return false;
649   }
650 
651 public:
652   Dbdih(Block_context& ctx);
653   virtual ~Dbdih();
654 
655   struct RWFragment {
656     Uint32 pageIndex;
657     Uint32 wordIndex;
658     Uint32 fragId;
659     TabRecordPtr rwfTabPtr;
660     PageRecordPtr rwfPageptr;
661     Uint32 totalfragments;
662   };
663   struct CopyTableNode {
664     Uint32 pageIndex;
665     Uint32 wordIndex;
666     Uint32 noOfWords;
667     TabRecordPtr ctnTabPtr;
668     PageRecordPtr ctnPageptr;
669   };
670 
671 private:
672   friend class SimulatedBlock;
673   BLOCK_DEFINES(Dbdih);
674 
675   void execDUMP_STATE_ORD(Signal *);
676   void execNDB_TAMPER(Signal *);
677   void execDEBUG_SIG(Signal *);
678   void execEMPTY_LCP_CONF(Signal *);
679   void execEMPTY_LCP_REP(Signal*);
680   void execMASTER_GCPREF(Signal *);
681   void execMASTER_GCPREQ(Signal *);
682   void execMASTER_GCPCONF(Signal *);
683   void execMASTER_LCPREF(Signal *);
684   void execMASTER_LCPREQ(Signal *);
685   void execMASTER_LCPCONF(Signal *);
686   void execNF_COMPLETEREP(Signal *);
687   void execSTART_PERMREQ(Signal *);
688   void execSTART_PERMCONF(Signal *);
689   void execSTART_PERMREF(Signal *);
690   void execINCL_NODEREQ(Signal *);
691   void execINCL_NODECONF(Signal *);
692 
693   void execSTART_TOREQ(Signal *);
694   void execSTART_TOREF(Signal *);
695   void execSTART_TOCONF(Signal*);
696 
697   void execEND_TOREQ(Signal *);
698   void execEND_TOREF(Signal *);
699   void execEND_TOCONF(Signal*);
700 
701   void execUPDATE_TOREQ(Signal* signal);
702   void execUPDATE_TOREF(Signal* signal);
703   void execUPDATE_TOCONF(Signal* signal);
704 
705   void execSTART_MEREQ(Signal *);
706   void execSTART_MECONF(Signal *);
707   void execSTART_MEREF(Signal *);
708   void execSTART_COPYREQ(Signal *);
709   void execSTART_COPYCONF(Signal *);
710   void execSTART_COPYREF(Signal *);
711   void execCREATE_FRAGREQ(Signal *);
712   void execCREATE_FRAGCONF(Signal *);
713   void execDIVERIFYREQ(Signal *);
714   void execGCP_SAVEREQ(Signal *);
715   void execGCP_SAVECONF(Signal *);
716   void execGCP_PREPARECONF(Signal *);
717   void execGCP_PREPARE(Signal *);
718   void execGCP_NODEFINISH(Signal *);
719   void execGCP_COMMIT(Signal *);
720   void execSUB_GCP_COMPLETE_REP(Signal *);
721   void execSUB_GCP_COMPLETE_ACK(Signal *);
722   void execDIHNDBTAMPER(Signal *);
723   void execCONTINUEB(Signal *);
724   void execCOPY_GCIREQ(Signal *);
725   void execCOPY_GCICONF(Signal *);
726   void execCOPY_TABREQ(Signal *);
727   void execCOPY_TABCONF(Signal *);
728   void execTCGETOPSIZECONF(Signal *);
729   void execTC_CLOPSIZECONF(Signal *);
730 
731   void execDIH_GET_TABINFO_REQ(Signal*);
732 
733   int handle_invalid_lcp_no(const struct LcpFragRep*, ReplicaRecordPtr);
734   void execLCP_FRAG_REP(Signal *);
735   void execLCP_COMPLETE_REP(Signal *);
736   void execSTART_LCP_REQ(Signal *);
737   void execSTART_LCP_CONF(Signal *);
738   MutexHandle2<DIH_START_LCP_MUTEX> c_startLcpMutexHandle;
739   void startLcpMutex_locked(Signal* signal, Uint32, Uint32);
740   void startLcpMutex_unlocked(Signal* signal, Uint32, Uint32);
741   void lcpFragmentMutex_locked(Signal* signal, Uint32, Uint32);
742   void master_lcp_fragmentMutex_locked(Signal* signal, Uint32, Uint32);
743 
744   MutexHandle2<DIH_SWITCH_PRIMARY_MUTEX> c_switchPrimaryMutexHandle;
745   void switchPrimaryMutex_locked(Signal* signal, Uint32, Uint32);
746   void switchPrimaryMutex_unlocked(Signal* signal, Uint32, Uint32);
747   void check_force_lcp(Ptr<TakeOverRecord> takeOverPtr);
748 
749   void switch_primary_stop_node(Signal* signal, Uint32, Uint32);
750 
751   void updateToReq_fragmentMutex_locked(Signal*, Uint32, Uint32);
752 
753   MutexHandle2<DIH_FRAGMENT_INFO> c_fragmentInfoMutex_lcp;
754 
755   void execBLOCK_COMMIT_ORD(Signal *);
756   void execUNBLOCK_COMMIT_ORD(Signal *);
757 
758   void execDIH_SWITCH_REPLICA_REQ(Signal *);
759   void execDIH_SWITCH_REPLICA_REF(Signal *);
760   void execDIH_SWITCH_REPLICA_CONF(Signal *);
761 
762   void execSTOP_PERM_REQ(Signal *);
763   void execSTOP_PERM_REF(Signal *);
764   void execSTOP_PERM_CONF(Signal *);
765 
766   void execSTOP_ME_REQ(Signal *);
767   void execSTOP_ME_REF(Signal *);
768   void execSTOP_ME_CONF(Signal *);
769 
770   void execREAD_CONFIG_REQ(Signal *);
771   void execUNBLO_DICTCONF(Signal *);
772   void execCOPY_ACTIVECONF(Signal *);
773   void execTAB_COMMITREQ(Signal *);
774   void execNODE_FAILREP(Signal *);
775   void execCOPY_FRAGCONF(Signal *);
776   void execCOPY_FRAGREF(Signal *);
777   void execPREPARE_COPY_FRAG_REF(Signal*);
778   void execPREPARE_COPY_FRAG_CONF(Signal*);
779   void execDIADDTABREQ(Signal *);
780   void execDIGETNODESREQ(Signal *);
781   void execSTTOR(Signal *);
782   void execDIH_SCAN_TAB_REQ(Signal *);
783   void execDIH_SCAN_GET_NODES_REQ(Signal *);
784   void execDIH_SCAN_TAB_COMPLETE_REP(Signal*);
785   void execGCP_SAVEREF(Signal *);
786   void execGCP_TCFINISHED(Signal *);
787   void execGCP_TCFINISHED_sync_conf(Signal* signal, Uint32 cb, Uint32 err);
788   void execREAD_NODESCONF(Signal *);
789   void execNDB_STTOR(Signal *);
790   void execDICTSTARTCONF(Signal *);
791   void execNDB_STARTREQ(Signal *);
792   void execGETGCIREQ(Signal *);
793   void execDIH_RESTARTREQ(Signal *);
794   void execSTART_RECCONF(Signal *);
795   void execSTART_FRAGREF(Signal *);
796   void execSTART_FRAGCONF(Signal *);
797   void execADD_FRAGCONF(Signal *);
798   void execADD_FRAGREF(Signal *);
799   void execDROP_FRAG_REF(Signal *);
800   void execDROP_FRAG_CONF(Signal *);
801   void execFSOPENCONF(Signal *);
802   void execFSOPENREF(Signal *);
803   void execFSCLOSECONF(Signal *);
804   void execFSCLOSEREF(Signal *);
805   void execFSREADCONF(Signal *);
806   void execFSREADREF(Signal *);
807   void execFSWRITECONF(Signal *);
808   void execFSWRITEREF(Signal *);
809   void execCHECKNODEGROUPSREQ(Signal *);
810   void execSTART_INFOREQ(Signal*);
811   void execSTART_INFOREF(Signal*);
812   void execSTART_INFOCONF(Signal*);
813   void execWAIT_GCP_REQ(Signal* signal);
814   void execWAIT_GCP_REF(Signal* signal);
815   void execWAIT_GCP_CONF(Signal* signal);
816 
817   void execPREP_DROP_TAB_REQ(Signal* signal);
818   void execDROP_TAB_REQ(Signal* signal);
819 
820   void execALTER_TAB_REQ(Signal* signal);
821 
822   void execCREATE_FRAGMENTATION_REQ(Signal*);
823 
824   void waitDropTabWritingToFile(Signal *, TabRecordPtr tabPtr);
825   void checkDropTabComplete(Signal *, TabRecordPtr tabPtr);
826 
827   void execDICT_LOCK_CONF(Signal* signal);
828   void execDICT_LOCK_REF(Signal* signal);
829 
830   void execUPGRADE_PROTOCOL_ORD(Signal* signal);
831 
832   void execCREATE_NODEGROUP_IMPL_REQ(Signal*);
833   void execDROP_NODEGROUP_IMPL_REQ(Signal*);
834 
835   // Statement blocks
836 //------------------------------------
837 // Methods that send signals
838 //------------------------------------
839   void nullRoutine(Signal *, Uint32 nodeId, Uint32);
840   void sendCOPY_GCIREQ(Signal *, Uint32 nodeId, Uint32);
841   void sendDIH_SWITCH_REPLICA_REQ(Signal *, Uint32 nodeId, Uint32);
842   void sendEMPTY_LCP_REQ(Signal *, Uint32 nodeId, Uint32);
843   void sendEND_TOREQ(Signal *, Uint32 nodeId, Uint32);
844   void sendGCP_COMMIT(Signal *, Uint32 nodeId, Uint32);
845   void sendGCP_PREPARE(Signal *, Uint32 nodeId, Uint32);
846   void sendGCP_SAVEREQ(Signal *, Uint32 nodeId, Uint32);
847   void sendSUB_GCP_COMPLETE_REP(Signal*, Uint32 nodeId, Uint32);
848   void sendINCL_NODEREQ(Signal *, Uint32 nodeId, Uint32);
849   void sendMASTER_GCPREQ(Signal *, Uint32 nodeId, Uint32);
850   void sendMASTER_LCPREQ(Signal *, Uint32 nodeId, Uint32);
851   void sendMASTER_LCPCONF(Signal * signal);
852   void sendSTART_RECREQ(Signal *, Uint32 nodeId, Uint32);
853   void sendSTART_INFOREQ(Signal *, Uint32 nodeId, Uint32);
854   void sendSTART_TOREQ(Signal *, Uint32 nodeId, Uint32);
855   void sendSTOP_ME_REQ(Signal *, Uint32 nodeId, Uint32);
856   void sendTC_CLOPSIZEREQ(Signal *, Uint32 nodeId, Uint32);
857   void sendTCGETOPSIZEREQ(Signal *, Uint32 nodeId, Uint32);
858   void sendUPDATE_TOREQ(Signal *, Uint32 nodeId, Uint32);
859   void sendSTART_LCP_REQ(Signal *, Uint32 nodeId, Uint32);
860 
861   void sendLCP_FRAG_ORD(Signal*, NodeRecord::FragmentCheckpointInfo info);
862   void sendLastLCP_FRAG_ORD(Signal *);
863 
864   void sendCopyTable(Signal *, CopyTableNode* ctn,
865                      BlockReference ref, Uint32 reqinfo);
866   void sendCreateFragReq(Signal *,
867                          Uint32 startGci,
868                          Uint32 storedType,
869                          Uint32 takeOverPtr);
870   void sendDihfragreq(Signal *,
871                       TabRecordPtr regTabPtr,
872                       Uint32 fragId);
873 
874   void sendStartTo(Signal* signal, TakeOverRecordPtr);
875   void sendUpdateTo(Signal* signal, TakeOverRecordPtr);
876 
877   void sendStartFragreq(Signal *,
878                         TabRecordPtr regTabPtr,
879                         Uint32 fragId);
880   void sendAddFragreq(Signal *,
881                       TabRecordPtr regTabPtr,
882                       Uint32 fragId,
883                       Uint32 lcpNo,
884                       Uint32 param);
885 
886   void sendAddFragreq(Signal*, ConnectRecordPtr, TabRecordPtr, Uint32 fragId);
887   void addTable_closeConf(Signal* signal, Uint32 tabPtrI);
888   void resetReplicaSr(TabRecordPtr tabPtr);
889   void resetReplicaLcp(ReplicaRecord * replicaP, Uint32 stopGci);
890   void resetReplica(Ptr<ReplicaRecord>);
891 
892 //------------------------------------
893 // Methods for LCP functionality
894 //------------------------------------
895   void checkKeepGci(TabRecordPtr, Uint32, Fragmentstore*, Uint32);
896   void checkLcpStart(Signal *, Uint32 lineNo);
897   void checkStartMoreLcp(Signal *, Uint32 nodeId);
898   bool reportLcpCompletion(const struct LcpFragRep *);
899   void sendLCP_COMPLETE_REP(Signal *);
900 
901 //------------------------------------
902 // Methods for Delete Table Files
903 //------------------------------------
904   void startDeleteFile(Signal* signal, TabRecordPtr tabPtr);
905   void openTableFileForDelete(Signal* signal, Uint32 fileIndex);
906   void tableOpenLab(Signal* signal, FileRecordPtr regFilePtr);
907   void tableDeleteLab(Signal* signal, FileRecordPtr regFilePtr);
908 
909 //------------------------------------
910 // File Record specific methods
911 //------------------------------------
912   void closeFile(Signal *, FileRecordPtr regFilePtr);
913   void closeFileDelete(Signal *, FileRecordPtr regFilePtr);
914   void createFileRw(Signal *, FileRecordPtr regFilePtr);
915   void openFileRw(Signal *, FileRecordPtr regFilePtr);
916   void openFileRo(Signal *, FileRecordPtr regFilePtr);
917   void seizeFile(FileRecordPtr& regFilePtr);
918   void releaseFile(Uint32 fileIndex);
919 
920 //------------------------------------
921 // Methods called when completing file
922 // operation.
923 //------------------------------------
924   void creatingGcpLab(Signal *, FileRecordPtr regFilePtr);
925   void openingGcpLab(Signal *, FileRecordPtr regFilePtr);
926   void openingTableLab(Signal *, FileRecordPtr regFilePtr);
927   void tableCreateLab(Signal *, FileRecordPtr regFilePtr);
928   void creatingGcpErrorLab(Signal *, FileRecordPtr regFilePtr);
929   void openingCopyGciErrorLab(Signal *, FileRecordPtr regFilePtr);
930   void creatingCopyGciErrorLab(Signal *, FileRecordPtr regFilePtr);
931   void openingGcpErrorLab(Signal *, FileRecordPtr regFilePtr);
932   void openingTableErrorLab(Signal *, FileRecordPtr regFilePtr);
933   void tableCreateErrorLab(Signal *, FileRecordPtr regFilePtr);
934   void closingGcpLab(Signal *, FileRecordPtr regFilePtr);
935   void closingGcpCrashLab(Signal *, FileRecordPtr regFilePtr);
936   void closingTableCrashLab(Signal *, FileRecordPtr regFilePtr);
937   void closingTableSrLab(Signal *, FileRecordPtr regFilePtr);
938   void tableCloseLab(Signal *, FileRecordPtr regFilePtr);
939   void tableCloseErrorLab(FileRecordPtr regFilePtr);
940   void readingGcpLab(Signal *, FileRecordPtr regFilePtr);
941   void readingTableLab(Signal *, FileRecordPtr regFilePtr);
942   void readingGcpErrorLab(Signal *, FileRecordPtr regFilePtr);
943   void readingTableErrorLab(Signal *, FileRecordPtr regFilePtr);
944   void writingCopyGciLab(Signal *, FileRecordPtr regFilePtr);
945   void writeInitGcpLab(Signal *, FileRecordPtr regFilePtr);
946   void tableWriteLab(Signal *, FileRecordPtr regFilePtr);
947   void writeInitGcpErrorLab(Signal *, FileRecordPtr regFilePtr);
948 
949 
950   void checkEscalation();
951   void clearRestartInfoBits(Signal *);
952   void invalidateLcpInfoAfterSr(Signal*);
953 
954   bool isMaster();
955   bool isActiveMaster();
956 
957   void handleGcpStateInMaster(Signal *, NodeRecordPtr failedNodeptr);
958   void initRestartInfo(Signal*);
959   void initRestorableGciFiles();
960   void makeNodeGroups(Uint32 nodeArray[]);
961   void add_nodegroup(NodeGroupRecordPtr);
962   void inc_ng_refcount(Uint32 ng);
963   void dec_ng_refcount(Uint32 ng);
964 
965   void makePrnList(class ReadNodesConf * readNodes, Uint32 nodeArray[]);
966   void nodeResetStart(Signal* signal);
967   void releaseTabPages(Uint32 tableId);
968   void replication(Uint32 noOfReplicas,
969                    NodeGroupRecordPtr NGPtr,
970                    FragmentstorePtr regFragptr);
971   void sendDihRestartRef(Signal*);
972   void selectMasterCandidateAndSend(Signal *);
973   void setLcpActiveStatusEnd(Signal*);
974   void setLcpActiveStatusStart(Signal *);
975   void setNodeActiveStatus();
976   void setNodeGroups();
977   void setNodeInfo(Signal *);
978   void setNodeLcpActiveStatus();
979   void setNodeRestartInfoBits(Signal*);
980   void startGcp(Signal *);
981   void startGcpMonitor(Signal*);
982 
983   void readFragment(RWFragment* rf, FragmentstorePtr regFragptr);
984   Uint32 readPageWord(RWFragment* rf);
985   void readReplica(RWFragment* rf, ReplicaRecordPtr readReplicaPtr);
986   void readReplicas(RWFragment* rf, FragmentstorePtr regFragptr);
987   void readRestorableGci(Signal *, FileRecordPtr regFilePtr);
988   void readTabfile(Signal *, TabRecord* tab, FileRecordPtr regFilePtr);
989   void writeFragment(RWFragment* wf, FragmentstorePtr regFragptr);
990   void writePageWord(RWFragment* wf, Uint32 dataWord);
991   void writeReplicas(RWFragment* wf, Uint32 replicaStartIndex);
992   void writeRestorableGci(Signal *, FileRecordPtr regFilePtr);
993   void writeTabfile(Signal *, TabRecord* tab, FileRecordPtr regFilePtr);
994   void copyTabReq_complete(Signal* signal, TabRecordPtr tabPtr);
995 
996   void gcpcommitreqLab(Signal *);
997   void copyGciLab(Signal *, CopyGCIReq::CopyReason reason);
998   void storeNewLcpIdLab(Signal *);
999   void startLcpRoundLoopLab(Signal *, Uint32 startTableId, Uint32 startFragId);
1000 
1001   void nodeFailCompletedCheckLab(Signal*, NodeRecordPtr failedNodePtr);
1002 
1003   /**
1004    *
1005    */
1006   void setLocalNodefailHandling(Signal*, Uint32 failedNodeId,
1007 				NodefailHandlingStep step);
1008   void checkLocalNodefailComplete(Signal*, Uint32 failedNodeId,
1009 				  NodefailHandlingStep step);
1010 
1011   Callback m_sendSTTORRY;
1012   void sendSTTORRY(Signal*, Uint32 senderData = 0, Uint32 retVal = 0);
1013   void ndbsttorry10Lab(Signal *, Uint32 _line);
1014   void createMutexes(Signal* signal, Uint32 no);
1015   void createMutex_done(Signal* signal, Uint32 no, Uint32 retVal);
1016   void dumpGcpStop();
1017   void crashSystemAtGcpStop(Signal *, bool);
1018   void sendFirstDictfragsreq(Signal *, TabRecordPtr regTabPtr);
1019   void addtabrefuseLab(Signal *, ConnectRecordPtr regConnectPtr, Uint32 errorCode);
1020   void GCP_SAVEhandling(Signal *, Uint32 nodeId);
1021   void packTableIntoPagesLab(Signal *, Uint32 tableId);
1022   void readPagesIntoTableLab(Signal *, Uint32 tableId);
1023   void readPagesIntoFragLab(Signal *, RWFragment* rf);
1024   void readTabDescriptionLab(Signal *, Uint32 tableId);
1025   void copyTableLab(Signal *, Uint32 tableId);
1026   void breakCopyTableLab(Signal *,
1027                          TabRecordPtr regTabPtr,
1028                          Uint32 nodeId);
1029   void checkAddfragCompletedLab(Signal *,
1030                                 TabRecordPtr regTabPtr,
1031                                 Uint32 fragId);
1032   void completeRestartLab(Signal *);
1033   void readTableFromPagesLab(Signal *, TabRecordPtr regTabPtr);
1034   void srPhase2ReadTableLab(Signal *, TabRecordPtr regTabPtr);
1035   void checkTcCounterLab(Signal *);
1036   void calculateKeepGciLab(Signal *, Uint32 tableId, Uint32 fragId);
1037   void tableUpdateLab(Signal *, TabRecordPtr regTabPtr);
1038   void checkLcpCompletedLab(Signal *);
1039   void initLcpLab(Signal *, Uint32 masterRef, Uint32 tableId);
1040   void startGcpLab(Signal *, Uint32 aWaitTime);
1041   void checkGcpStopLab(Signal *);
1042   void MASTER_GCPhandling(Signal *, Uint32 failedNodeId);
1043   void MASTER_LCPhandling(Signal *, Uint32 failedNodeId);
1044   void rnfTableNotReadyLab(Signal *, TabRecordPtr regTabPtr, Uint32 removeNodeId);
1045   void startLcpTakeOverLab(Signal *, Uint32 failedNodeId);
1046 
1047   void startLcpMasterTakeOver(Signal *, Uint32 failedNodeId);
1048   void startGcpMasterTakeOver(Signal *, Uint32 failedNodeId);
1049   void checkGcpOutstanding(Signal*, Uint32 failedNodeId);
1050 
1051   void checkEmptyLcpComplete(Signal *);
1052   void lcpBlockedLab(Signal *, Uint32, Uint32);
1053   void breakCheckTabCompletedLab(Signal *, TabRecordPtr regTabptr);
1054   void readGciFileLab(Signal *);
1055   void openingCopyGciSkipInitLab(Signal *, FileRecordPtr regFilePtr);
1056   void startLcpRoundLab(Signal *);
1057   void gcpBlockedLab(Signal *);
1058   void initialStartCompletedLab(Signal *);
1059   void allNodesLcpCompletedLab(Signal *);
1060   void nodeRestartPh2Lab(Signal *);
1061   void nodeRestartPh2Lab2(Signal *);
1062   void initGciFilesLab(Signal *);
1063   void dictStartConfLab(Signal *);
1064   void nodeDictStartConfLab(Signal *);
1065   void ndbStartReqLab(Signal *, BlockReference ref);
1066   void nodeRestartStartRecConfLab(Signal *);
1067   void dihCopyCompletedLab(Signal *);
1068   void release_connect(ConnectRecordPtr ptr);
1069   void copyTableNode(Signal *,
1070                      CopyTableNode* ctn,
1071                      NodeRecordPtr regNodePtr);
1072   void startFragment(Signal *, Uint32 tableId, Uint32 fragId);
1073   bool checkLcpAllTablesDoneInLqh(Uint32 from);
1074 
1075   void lcpStateAtNodeFailureLab(Signal *, Uint32 nodeId);
1076   void copyNodeLab(Signal *, Uint32 tableId);
1077   void copyGciReqLab(Signal *);
1078   void allLab(Signal *,
1079               ConnectRecordPtr regConnectPtr,
1080               TabRecordPtr regTabPtr);
1081   void tableCopyNodeLab(Signal *, TabRecordPtr regTabPtr);
1082 
1083   void removeNodeFromTables(Signal *, Uint32 tableId, Uint32 nodeId);
1084   void removeNodeFromTable(Signal *, Uint32 tableId, TabRecordPtr tabPtr);
1085   void removeNodeFromTablesComplete(Signal* signal, Uint32 nodeId);
1086 
1087   void packFragIntoPagesLab(Signal *, RWFragment* wf);
1088   void startNextChkpt(Signal *);
1089   void failedNodeLcpHandling(Signal*, NodeRecordPtr failedNodePtr);
1090   void failedNodeSynchHandling(Signal *, NodeRecordPtr failedNodePtr);
1091   void checkCopyTab(Signal*, NodeRecordPtr failedNodePtr);
1092 
1093   void initCommonData();
1094   void initialiseRecordsLab(Signal *, Uint32 stepNo, Uint32, Uint32);
1095 
1096   void findReplica(ReplicaRecordPtr& regReplicaPtr,
1097                    Fragmentstore* fragPtrP,
1098 		   Uint32 nodeId,
1099 		   bool oldStoredReplicas = false);
1100 //------------------------------------
1101 // Node failure handling methods
1102 //------------------------------------
1103   void startRemoveFailedNode(Signal *, NodeRecordPtr failedNodePtr);
1104   void handleGcpTakeOver(Signal *, NodeRecordPtr failedNodePtr);
1105   void handleLcpTakeOver(Signal *, NodeRecordPtr failedNodePtr);
1106   void handleNewMaster(Signal *, NodeRecordPtr failedNodePtr);
1107   void handleTakeOver(Signal*, Ptr<TakeOverRecord>);
1108   void handleLcpMasterTakeOver(Signal *, Uint32 nodeId);
1109 
1110 //------------------------------------
1111 // Replica record specific methods
1112 //------------------------------------
1113   Uint32 findLogInterval(ConstPtr<ReplicaRecord> regReplicaPtr,
1114 			 Uint32 startGci);
1115   void findMinGci(ReplicaRecordPtr fmgReplicaPtr,
1116                   Uint32& keeGci,
1117                   Uint32& oldestRestorableGci);
1118   bool findStartGci(ConstPtr<ReplicaRecord> fstReplicaPtr,
1119                     Uint32 tfstStopGci,
1120                     Uint32& tfstStartGci,
1121                     Uint32& tfstLcp);
1122   void newCrashedReplica(ReplicaRecordPtr ncrReplicaPtr);
1123   void packCrashedReplicas(ReplicaRecordPtr pcrReplicaPtr);
1124   void releaseReplicas(Uint32 * replicaPtr);
1125   void removeOldCrashedReplicas(Uint32, Uint32, ReplicaRecordPtr rocReplicaPtr);
1126   void removeTooNewCrashedReplicas(ReplicaRecordPtr rtnReplicaPtr, Uint32 lastCompletedGCI);
1127   void mergeCrashedReplicas(ReplicaRecordPtr pcrReplicaPtr);
1128   void seizeReplicaRec(ReplicaRecordPtr& replicaPtr);
1129 
1130 //------------------------------------
1131 // Methods operating on a fragment and
1132 // its connected replicas and nodes.
1133 //------------------------------------
1134   void allocStoredReplica(FragmentstorePtr regFragptr,
1135                           ReplicaRecordPtr& newReplicaPtr,
1136                           Uint32 nodeId);
1137   Uint32 extractNodeInfo(const Fragmentstore * fragPtr, Uint32 nodes[]);
1138   bool findBestLogNode(CreateReplicaRecord* createReplica,
1139                        FragmentstorePtr regFragptr,
1140                        Uint32 startGci,
1141                        Uint32 stopGci,
1142                        Uint32 logNode,
1143                        Uint32& fblStopGci);
1144   bool findLogNodes(CreateReplicaRecord* createReplica,
1145                     FragmentstorePtr regFragptr,
1146                     Uint32 startGci,
1147                     Uint32 stopGci);
1148   void initFragstore(FragmentstorePtr regFragptr);
1149   void insertBackup(FragmentstorePtr regFragptr, Uint32 nodeId);
1150   void insertfraginfo(FragmentstorePtr regFragptr,
1151                       Uint32 noOfBackups,
1152                       Uint32* nodeArray);
1153   void linkOldStoredReplica(FragmentstorePtr regFragptr,
1154                             ReplicaRecordPtr replicaPtr);
1155   void linkStoredReplica(FragmentstorePtr regFragptr,
1156                          ReplicaRecordPtr replicaPtr);
1157   void prepareReplicas(FragmentstorePtr regFragptr);
1158   void removeNodeFromStored(Uint32 nodeId,
1159                             FragmentstorePtr regFragptr,
1160                             ReplicaRecordPtr replicaPtr,
1161 			    bool temporary);
1162   void removeOldStoredReplica(FragmentstorePtr regFragptr,
1163                               ReplicaRecordPtr replicaPtr);
1164   void removeStoredReplica(FragmentstorePtr regFragptr,
1165                            ReplicaRecordPtr replicaPtr);
1166   void searchStoredReplicas(FragmentstorePtr regFragptr);
1167   bool setup_create_replica(FragmentstorePtr, CreateReplicaRecord*,
1168 			    ConstPtr<ReplicaRecord>);
1169   void updateNodeInfo(FragmentstorePtr regFragptr);
1170 
1171 //------------------------------------
1172 // Fragment allocation, deallocation and
1173 // find methods
1174 //------------------------------------
1175   void allocFragments(Uint32 noOfFragments, TabRecordPtr regTabPtr);
1176   void releaseFragments(TabRecordPtr regTabPtr);
1177   void getFragstore(TabRecord *, Uint32 fragNo, FragmentstorePtr & ptr);
1178   void initialiseFragstore();
1179 
1180   void wait_old_scan(Signal*);
1181   Uint32 add_fragments_to_table(Ptr<TabRecord>, const Uint16 buf[]);
1182   Uint32 add_fragment_to_table(Ptr<TabRecord>, Uint32, Ptr<Fragmentstore>&);
1183 
1184   void drop_fragments(Signal*, ConnectRecordPtr, Uint32 last);
1185   void release_fragment_from_table(Ptr<TabRecord>, Uint32 fragId);
1186   void send_alter_tab_ref(Signal*, Ptr<TabRecord>,Ptr<ConnectRecord>, Uint32);
1187   void send_alter_tab_conf(Signal*, Ptr<ConnectRecord>);
1188   void alter_table_writeTable_conf(Signal* signal, Uint32 ptrI, Uint32 err);
1189   void saveTableFile(Signal*, Ptr<ConnectRecord>, Ptr<TabRecord>,
1190                      TabRecord::CopyStatus, Callback&);
1191 
1192 //------------------------------------
1193 // Page Record specific methods
1194 //------------------------------------
1195   void allocpage(PageRecordPtr& regPagePtr);
1196   void releasePage(Uint32 pageIndex);
1197 
1198 //------------------------------------
1199 // Table Record specific methods
1200 //------------------------------------
1201   void initTable(TabRecordPtr regTabPtr);
1202   void initTableFile(TabRecordPtr regTabPtr);
1203   void releaseTable(TabRecordPtr tabPtr);
1204   bool findTakeOver(Ptr<TakeOverRecord> & ptr, Uint32 failedNodeId);
1205   void handleTakeOverMaster(Signal *, Uint32 takeOverPtr);
1206   void handleTakeOverNewMaster(Signal *, Uint32 takeOverPtr);
1207 
1208 //------------------------------------
1209 // TakeOver Record specific methods
1210 //------------------------------------
1211   void releaseTakeOver(TakeOverRecordPtr);
1212   void abortTakeOver(Signal*, TakeOverRecordPtr);
1213   bool anyActiveTakeOver();
1214   void checkToCopy();
1215   void checkToCopyCompleted(Signal *);
1216   bool checkToInterrupted(TakeOverRecordPtr& regTakeOverptr);
1217   Uint32 getStartNode(Uint32 takeOverPtr);
1218 
1219 //------------------------------------
1220 // Methods for take over functionality
1221 //------------------------------------
1222   void changeNodeGroups(Uint32 startNode, Uint32 nodeTakenOver);
1223   void endTakeOver(Uint32 takeOverPtr);
1224 
1225   void systemRestartTakeOverLab(Signal *);
1226   void startTakeOver(Signal *,
1227                      Uint32 startNode,
1228                      Uint32 toNode,
1229                      const struct StartCopyReq*);
1230   void startNextCopyFragment(Signal *, Uint32 takeOverPtr);
1231   void toCopyFragLab(Signal *, Uint32 takeOverPtr);
1232   void toStartCopyFrag(Signal *, TakeOverRecordPtr);
1233   void startHsAddFragConfLab(Signal *);
1234   void prepareSendCreateFragReq(Signal *, Uint32 takeOverPtr);
1235   void toCopyCompletedLab(Signal *, TakeOverRecordPtr regTakeOverptr);
1236   void takeOverCompleted(Uint32 aNodeId);
1237 
1238 //------------------------------------
1239 // Node Record specific methods
1240 //------------------------------------
1241   void checkStartTakeOver(Signal *);
1242   void insertAlive(NodeRecordPtr newNodePtr);
1243   void insertDeadNode(NodeRecordPtr removeNodePtr);
1244   void removeAlive(NodeRecordPtr removeNodePtr);
1245   void removeDeadNode(NodeRecordPtr removeNodePtr);
1246 
1247   NodeRecord::NodeStatus getNodeStatus(Uint32 nodeId);
1248   void setNodeStatus(Uint32 nodeId, NodeRecord::NodeStatus);
1249   Sysfile::ActiveStatus getNodeActiveStatus(Uint32 nodeId);
1250   void setNodeActiveStatus(Uint32 nodeId, Sysfile::ActiveStatus newStatus);
1251   void setNodeLcpActiveStatus(Uint32 nodeId, bool newState);
1252   bool getNodeLcpActiveStatus(Uint32 nodeId);
1253   bool getAllowNodeStart(Uint32 nodeId);
1254   void setAllowNodeStart(Uint32 nodeId, bool newState);
1255   bool getNodeCopyCompleted(Uint32 nodeId);
1256   void setNodeCopyCompleted(Uint32 nodeId, bool newState);
1257   Uint32 getNodeGroup(Uint32 nodeId) const;
1258   bool checkNodeAlive(Uint32 nodeId);
1259 
1260   void nr_start_fragments(Signal*, TakeOverRecordPtr);
1261   void nr_start_fragment(Signal*, TakeOverRecordPtr, ReplicaRecordPtr);
1262   void nr_run_redo(Signal*, TakeOverRecordPtr);
1263   void nr_start_logging(Signal*, TakeOverRecordPtr);
1264 
1265   void getTabInfo(Signal*);
1266   void getTabInfo_send(Signal*, TabRecordPtr);
1267   void getTabInfo_sendComplete(Signal*, Uint32, Uint32);
1268   int getTabInfo_copyTableToSection(SegmentedSectionPtr & ptr, CopyTableNode);
1269   int getTabInfo_copySectionToPages(TabRecordPtr, SegmentedSectionPtr);
1270 
1271   // Initialisation
1272   void initData();
1273   void initRecords();
1274 
1275   // Variables to support record structures and their free lists
1276 
1277   Uint32 capiConnectFileSize;
1278 
1279   ConnectRecord *connectRecord;
1280   Uint32 cfirstconnect;
1281   Uint32 cconnectFileSize;
1282 
1283   CreateReplicaRecord *createReplicaRecord;
1284   Uint32 cnoOfCreateReplicas;
1285 
1286   FileRecord *fileRecord;
1287   Uint32 cfirstfreeFile;
1288   Uint32 cfileFileSize;
1289 
1290   Fragmentstore *fragmentstore;
1291   Uint32 cfirstfragstore;
1292   Uint32 cfragstoreFileSize;
1293   RSS_OP_SNAPSHOT(cremainingfrags);
1294 
1295   Uint32 c_nextNodeGroup;
1296   NodeGroupRecord *nodeGroupRecord;
1297   RSS_OP_SNAPSHOT(cnghash);
1298 
1299   NodeRecord *nodeRecord;
1300 
1301   PageRecord *pageRecord;
1302   Uint32 cfirstfreepage;
1303   Uint32 cpageFileSize;
1304 
1305   ReplicaRecord *replicaRecord;
1306   Uint32 cfirstfreeReplica;
1307   Uint32 cnoFreeReplicaRec;
1308   Uint32 creplicaFileSize;
1309   RSS_OP_SNAPSHOT(cnoFreeReplicaRec);
1310 
1311   TabRecord *tabRecord;
1312   Uint32 ctabFileSize;
1313 
1314   ArrayPool<TakeOverRecord> c_takeOverPool;
1315   DLList<TakeOverRecord> c_activeTakeOverList;
1316 
1317   /*
1318     2.4  C O M M O N    S T O R E D    V A R I A B L E S
1319     ----------------------------------------------------
1320   */
1321   struct DIVERIFY_queue
1322   {
DIVERIFY_queueDbdih::DIVERIFY_queue1323     DIVERIFY_queue() {
1324       m_ref = 0;
1325       cfirstVerifyQueue = clastVerifyQueue = 0;
1326       apiConnectRecord = 0;
1327       m_empty_done = 1;
1328     }
1329     ApiConnectRecord *apiConnectRecord;
1330     Uint32 cfirstVerifyQueue;
1331     Uint32 clastVerifyQueue;
1332     Uint32 m_empty_done;
1333     Uint32 m_ref;
1334   };
1335 
1336   bool isEmpty(const DIVERIFY_queue&);
1337   void enqueue(DIVERIFY_queue&, Uint32 senderData, Uint64 gci);
1338   void dequeue(DIVERIFY_queue&, ApiConnectRecord &);
1339   void emptyverificbuffer(Signal *, Uint32 q, bool aContintueB);
1340   void emptyverificbuffer_check(Signal*, Uint32, Uint32);
1341 
1342   DIVERIFY_queue c_diverify_queue[MAX_NDBMT_LQH_THREADS];
1343   Uint32 c_diverify_queue_cnt;
1344 
1345   /*------------------------------------------------------------------------*/
1346   /*       THIS VARIABLE KEEPS THE REFERENCES TO FILE RECORDS THAT DESCRIBE */
1347   /*       THE TWO FILES THAT ARE USED TO STORE THE VARIABLE CRESTART_INFO  */
1348   /*       ON DISK.                                                         */
1349   /*------------------------------------------------------------------------*/
1350   Uint32 crestartInfoFile[2];
1351 
1352   bool cgckptflag;    /* A FLAG WHICH IS SET WHILE A NEW GLOBAL CHECK
1353                            POINT IS BEING CREATED. NO VERIFICATION IS ALLOWED
1354                            IF THE FLAG IS SET*/
1355   Uint32 cgcpOrderBlocked;
1356 
1357   /**
1358    * This structure describes
1359    *   the GCP Save protocol
1360    */
1361   struct GcpSave
1362   {
1363     Uint32 m_gci;
1364     Uint32 m_master_ref;
1365     enum State {
1366       GCP_SAVE_IDLE     = 0, // Idle
1367       GCP_SAVE_REQ      = 1, // REQ received
1368       GCP_SAVE_CONF     = 2, // REF/CONF sent
1369       GCP_SAVE_COPY_GCI = 3
1370     } m_state;
1371 
1372     struct {
1373       State m_state;
1374       Uint32 m_new_gci;
1375       Uint32 m_time_between_gcp;   /* Delay between global checkpoints */
1376       Uint64 m_start_time;
1377     } m_master;
1378   } m_gcp_save;
1379 
1380   /**
1381    * This structure describes the MicroGCP protocol
1382    */
1383   struct MicroGcp
1384   {
MicroGcpDbdih::MicroGcp1385     MicroGcp() { }
1386     bool m_enabled;
1387     Uint32 m_master_ref;
1388 
1389     /**
1390      * rw-lock that protects multiple parallel DIVERIFY (readers) from
1391      *   updates to gcp-state (e.g GCP_PREPARE, GCP_COMMIT)
1392      */
1393     NdbSeqLock m_lock;
1394     Uint64 m_old_gci;
1395     Uint64 m_current_gci; // Currently active
1396     Uint64 m_new_gci;     // Currently being prepared...
1397     enum State {
1398       M_GCP_IDLE      = 0,
1399       M_GCP_PREPARE   = 1,
1400       M_GCP_COMMIT    = 2,
1401       M_GCP_COMMITTED = 3,
1402       M_GCP_COMPLETE  = 4
1403     } m_state;
1404 
1405     struct {
1406       State m_state;
1407       Uint32 m_time_between_gcp;
1408       Uint64 m_new_gci;
1409       Uint64 m_start_time;
1410     } m_master;
1411   } m_micro_gcp;
1412 
1413   struct GcpMonitor
1414   {
1415     struct
1416     {
1417       Uint32 m_gci;
1418       Uint32 m_counter;
1419       Uint32 m_max_lag;
1420     } m_gcp_save;
1421 
1422     struct
1423     {
1424       Uint64 m_gci;
1425       Uint32 m_counter;
1426       Uint32 m_max_lag;
1427     } m_micro_gcp;
1428   } m_gcp_monitor;
1429 
1430   /*------------------------------------------------------------------------*/
1431   /*       THIS VARIABLE KEEPS TRACK OF THE STATE OF THIS NODE AS MASTER.   */
1432   /*------------------------------------------------------------------------*/
1433   enum MasterState {
1434     MASTER_IDLE = 0,
1435     MASTER_ACTIVE = 1,
1436     MASTER_TAKE_OVER_GCP = 2
1437   };
1438   MasterState cmasterState;
1439   Uint16      cmasterTakeOverNode;
1440   /* NODE IS NOT MASTER            */
1441   /* NODE IS ACTIVE AS MASTER      */
1442   /* NODE IS TAKING OVER AS MASTER */
1443 
1444   struct CopyGCIMaster {
CopyGCIMasterDbdih::CopyGCIMaster1445     CopyGCIMaster(){
1446       m_copyReason = CopyGCIReq::IDLE;
1447       for (Uint32 i = 0; i<WAIT_CNT; i++)
1448         m_waiting[i] = CopyGCIReq::IDLE;
1449     }
1450     /*------------------------------------------------------------------------*/
1451     /*       THIS STATE VARIABLE IS USED TO INDICATE IF COPYING OF RESTART    */
1452     /*       INFO WAS STARTED BY A LOCAL CHECKPOINT OR AS PART OF A SYSTEM    */
1453     /*       RESTART.                                                         */
1454     /*------------------------------------------------------------------------*/
1455     CopyGCIReq::CopyReason m_copyReason;
1456 
1457     /*------------------------------------------------------------------------*/
1458     /*       COPYING RESTART INFO CAN BE STARTED BY LOCAL CHECKPOINTS AND BY  */
1459     /*       GLOBAL CHECKPOINTS. WE CAN HOWEVER ONLY HANDLE TWO SUCH COPY AT  */
1460     /*       THE TIME. THUS WE HAVE TO KEEP WAIT INFORMATION IN THIS VARIABLE.*/
1461     /*------------------------------------------------------------------------*/
1462     STATIC_CONST( WAIT_CNT = 2 );
1463     CopyGCIReq::CopyReason m_waiting[WAIT_CNT];
1464   } c_copyGCIMaster;
1465 
1466   struct CopyGCISlave {
CopyGCISlaveDbdih::CopyGCISlave1467     CopyGCISlave(){ m_copyReason = CopyGCIReq::IDLE; m_expectedNextWord = 0;}
1468     /*------------------------------------------------------------------------*/
1469     /*       THIS STATE VARIABLE IS USED TO INDICATE IF COPYING OF RESTART    */
1470     /*       INFO WAS STARTED BY A LOCAL CHECKPOINT OR AS PART OF A SYSTEM    */
1471     /*       RESTART. THIS VARIABLE IS USED BY THE NODE THAT RECEIVES         */
1472     /*       COPY_GCI_REQ.                                                    */
1473     /*------------------------------------------------------------------------*/
1474     Uint32 m_senderData;
1475     BlockReference m_senderRef;
1476     CopyGCIReq::CopyReason m_copyReason;
1477 
1478     Uint32 m_expectedNextWord;
1479   } c_copyGCISlave;
1480 
1481   /*------------------------------------------------------------------------*/
1482   /*       THIS VARIABLE IS USED TO KEEP TRACK OF THE STATE OF LOCAL        */
1483   /*       CHECKPOINTS.                                                     */
1484   /*------------------------------------------------------------------------*/
1485 public:
1486   enum LcpStatus {
1487     LCP_STATUS_IDLE        = 0,
1488     LCP_TCGET              = 1,  // Only master
1489     LCP_STATUS_ACTIVE      = 2,
1490     LCP_CALCULATE_KEEP_GCI = 4,  // Only master
1491     LCP_COPY_GCI           = 5,
1492     LCP_INIT_TABLES        = 6,
1493     LCP_TC_CLOPSIZE        = 7,  // Only master
1494     LCP_START_LCP_ROUND    = 8,
1495     LCP_TAB_COMPLETED      = 9,
1496     LCP_TAB_SAVED          = 10
1497   };
1498 private:
1499 
1500   struct LcpState {
LcpStateDbdih::LcpState1501     LcpState() {}
1502     LcpStatus lcpStatus;
1503     Uint32 lcpStatusUpdatedPlace;
1504 
1505     struct Save {
1506       LcpStatus m_status;
1507       Uint32 m_place;
1508     } m_saveState[10];
1509 
setLcpStatusDbdih::LcpState1510     void setLcpStatus(LcpStatus status, Uint32 line){
1511       for (Uint32 i = 9; i > 0; i--)
1512         m_saveState[i] = m_saveState[i-1];
1513       m_saveState[0].m_status = lcpStatus;
1514       m_saveState[0].m_place = lcpStatusUpdatedPlace;
1515 
1516       lcpStatus = status;
1517       lcpStatusUpdatedPlace = line;
1518     }
1519 
1520     Uint32 lcpStart;
1521     Uint32 lcpStopGcp;
1522     Uint32 keepGci;      /* USED TO CALCULATE THE GCI TO KEEP AFTER A LCP  */
1523     Uint32 oldestRestorableGci;
1524 
1525     Uint64 m_start_time; // When last LCP was started
1526     Uint64 m_lcp_time;   // How long last LCP took
1527     Uint32 m_lcp_trylock_timeout;
1528 
1529     struct CurrentFragment {
1530       Uint32 tableId;
1531       Uint32 fragmentId;
1532     } currentFragment;
1533 
1534     Uint32 noOfLcpFragRepOutstanding;
1535 
1536     /*------------------------------------------------------------------------*/
1537     /*       USED TO ENSURE THAT LCP'S ARE EXECUTED WITH CERTAIN TIMEINTERVALS*/
1538     /*       EVEN WHEN SYSTEM IS NOT DOING ANYTHING.                          */
1539     /*------------------------------------------------------------------------*/
1540     Uint32 ctimer;
1541     Uint32 ctcCounter;
1542     Uint32 clcpDelay;            /* MAX. 2^(CLCP_DELAY - 2) SEC BETWEEN LCP'S */
1543 
1544     /*------------------------------------------------------------------------*/
1545     /*       THIS STATE IS USED TO TELL IF THE FIRST LCP AFTER START/RESTART  */
1546     /*       HAS BEEN RUN.  AFTER A NODE RESTART THE NODE DOES NOT ENTER      */
1547     /*       STARTED STATE BEFORE THIS IS DONE.                               */
1548     /*------------------------------------------------------------------------*/
1549     bool immediateLcpStart;
1550     bool m_LCP_COMPLETE_REP_From_Master_Received;
1551     SignalCounter m_LCP_COMPLETE_REP_Counter_DIH;
1552     SignalCounter m_LCP_COMPLETE_REP_Counter_LQH;
1553     SignalCounter m_LAST_LCP_FRAG_ORD;
1554     NdbNodeBitmask m_participatingLQH;
1555     NdbNodeBitmask m_participatingDIH;
1556 
1557     Uint32 m_masterLcpDihRef;
1558     bool   m_MASTER_LCPREQ_Received;
1559     Uint32 m_MASTER_LCPREQ_FailedNodeId;
1560 
1561     Uint32 m_lastLCP_COMPLETE_REP_id;
1562     Uint32 m_lastLCP_COMPLETE_REP_ref;
1563   } c_lcpState;
1564 
1565   /*------------------------------------------------------------------------*/
1566   /*       THIS VARIABLE KEEPS TRACK OF HOW MANY TABLES ARE ACTIVATED WHEN  */
1567   /*       STARTING A LOCAL CHECKPOINT WE SHOULD AVOID STARTING A CHECKPOINT*/
1568   /*       WHEN NO TABLES ARE ACTIVATED.                                    */
1569   /*------------------------------------------------------------------------*/
1570   Uint32 cnoOfActiveTables;
1571 
1572   BlockReference cdictblockref;          /* DICTIONARY BLOCK REFERENCE */
1573   Uint32 cfailurenr;              /* EVERY TIME WHEN A NODE FAILURE IS REPORTED
1574                                     THIS NUMBER IS INCREMENTED. AT THE START OF
1575                                     THE SYSTEM THIS NUMBER MUST BE INITIATED TO
1576                                     ZERO */
1577 
1578   BlockReference clocallqhblockref;
1579   BlockReference clocaltcblockref;
1580   BlockReference cmasterdihref;
1581   Uint16 cownNodeId;
1582   BlockReference cndbStartReqBlockref;
1583   BlockReference cntrlblockref;
1584   Uint32 con_lineNodes;
1585   Uint32 creceivedfrag;
1586   Uint32 cremainingfrags;
1587   Uint32 cstarttype;
1588   Uint32 csystemnodes;
1589   Uint32 c_newest_restorable_gci;
1590   Uint32 c_set_initial_start_flag;
1591   Uint64 c_current_time; // Updated approx. every 10ms
1592 
1593 public:
1594   enum LcpMasterTakeOverState {
1595     LMTOS_IDLE = 0,
1596     LMTOS_WAIT_EMPTY_LCP = 1,   // Currently doing empty LCP
1597     LMTOS_WAIT_LCP_FRAG_REP = 2,// Currently waiting for outst. LCP_FRAG_REP
1598     LMTOS_INITIAL = 3,
1599     LMTOS_ALL_IDLE = 4,
1600     LMTOS_ALL_ACTIVE = 5,
1601     LMTOS_LCP_CONCLUDING = 6,
1602     LMTOS_COPY_ONGOING = 7
1603   };
1604 private:
1605   class MasterTakeOverState {
1606   public:
MasterTakeOverState()1607     MasterTakeOverState() {}
set(LcpMasterTakeOverState s,Uint32 line)1608     void set(LcpMasterTakeOverState s, Uint32 line) {
1609       state = s; updatePlace = line;
1610     }
1611 
1612     LcpMasterTakeOverState state;
1613     Uint32 updatePlace;
1614 
1615     Uint32 minTableId;
1616     Uint32 minFragId;
1617     Uint32 failedNodeId;
1618   } c_lcpMasterTakeOverState;
1619 
1620   Uint16 cmasterNodeId;
1621 
1622   struct NodeStartMasterRecord {
NodeStartMasterRecordDbdih::NodeStartMasterRecord1623     NodeStartMasterRecord() {}
1624     Uint32 startNode;
1625     Uint32 wait;
1626     Uint32 failNr;
1627     bool activeState;
1628     bool blockLcp;
1629     Uint32 blockGcp; // 0, 1=ordered, 2=effective
1630     Uint32 startInfoErrorCode;
1631     Uint32 m_outstandingGsn;
1632     MutexHandle2<DIH_FRAGMENT_INFO> m_fragmentInfoMutex;
1633   };
1634   NodeStartMasterRecord c_nodeStartMaster;
1635 
1636   struct NodeStartSlaveRecord {
NodeStartSlaveRecordDbdih::NodeStartSlaveRecord1637     NodeStartSlaveRecord() { nodeId = 0;}
1638 
1639     Uint32 nodeId;
1640   };
1641   NodeStartSlaveRecord c_nodeStartSlave;
1642 
1643   Uint32 cfirstAliveNode;
1644   Uint32 cfirstDeadNode;
1645   Uint32 cstartPhase;
1646   Uint32 cnoReplicas;
1647 
1648   bool cwaitLcpSr;
1649   /**
1650    * Available nodegroups (ids) (length == cnoOfNodeGroups)
1651    *   use to support nodegroups 2,4,6 (not just consequtive nodegroup ids)
1652    */
1653   Uint32 c_node_groups[MAX_NDB_NODES];
1654   Uint32 cnoOfNodeGroups;
1655   Uint32 crestartGci;      /* VALUE OF GCI WHEN SYSTEM RESTARTED OR STARTED */
1656 
1657   /**
1658    * Counter variables keeping track of the number of outstanding signals
1659    * for particular signals in various protocols.
1660    */
1661   SignalCounter c_COPY_GCIREQ_Counter;
1662   SignalCounter c_COPY_TABREQ_Counter;
1663   SignalCounter c_CREATE_FRAGREQ_Counter;
1664   SignalCounter c_DIH_SWITCH_REPLICA_REQ_Counter;
1665   SignalCounter c_EMPTY_LCP_REQ_Counter;
1666   SignalCounter c_GCP_COMMIT_Counter;
1667   SignalCounter c_GCP_PREPARE_Counter;
1668   SignalCounter c_GCP_SAVEREQ_Counter;
1669   SignalCounter c_SUB_GCP_COMPLETE_REP_Counter;
1670   SignalCounter c_INCL_NODEREQ_Counter;
1671   SignalCounter c_MASTER_GCPREQ_Counter;
1672   SignalCounter c_MASTER_LCPREQ_Counter;
1673   SignalCounter c_START_INFOREQ_Counter;
1674   SignalCounter c_START_RECREQ_Counter;
1675   SignalCounter c_STOP_ME_REQ_Counter;
1676   SignalCounter c_TC_CLOPSIZEREQ_Counter;
1677   SignalCounter c_TCGETOPSIZEREQ_Counter;
1678   SignalCounter c_START_LCP_REQ_Counter;
1679 
1680   bool   c_blockCommit;
1681   Uint32 c_blockCommitNo;
1682 
getBlockCommit() const1683   bool getBlockCommit() const {
1684     return c_blockCommit || cgckptflag;
1685   }
1686 
1687   /**
1688    * SwitchReplicaRecord - Should only be used by master
1689    */
1690   struct SwitchReplicaRecord {
SwitchReplicaRecordDbdih::SwitchReplicaRecord1691     SwitchReplicaRecord() {}
clearDbdih::SwitchReplicaRecord1692     void clear(){}
1693 
1694     Uint32 nodeId;
1695     Uint32 tableId;
1696     Uint32 fragNo;
1697   };
1698   SwitchReplicaRecord c_switchReplicas;
1699 
1700   struct StopPermProxyRecord {
StopPermProxyRecordDbdih::StopPermProxyRecord1701     StopPermProxyRecord() { clientRef = 0; }
1702 
1703     Uint32 clientData;
1704     BlockReference clientRef;
1705     BlockReference masterRef;
1706   };
1707 
1708   struct StopPermMasterRecord {
StopPermMasterRecordDbdih::StopPermMasterRecord1709     StopPermMasterRecord() { clientRef = 0;}
1710 
1711     Uint32 returnValue;
1712 
1713     Uint32 clientData;
1714     BlockReference clientRef;
1715   };
1716 
1717   StopPermProxyRecord c_stopPermProxy;
1718   StopPermMasterRecord c_stopPermMaster;
1719 
1720   void checkStopPermProxy(Signal*, NodeId failedNodeId);
1721   void checkStopPermMaster(Signal*, NodeRecordPtr failedNodePtr);
1722 
1723   void switchReplica(Signal*,
1724 		     Uint32 nodeId,
1725 		     Uint32 tableId,
1726 		     Uint32 fragNo);
1727 
1728   void switchReplicaReply(Signal*, NodeId nodeId);
1729 
1730   /**
1731    * Wait GCP (proxy)
1732    */
1733   struct WaitGCPProxyRecord {
WaitGCPProxyRecordDbdih::WaitGCPProxyRecord1734     WaitGCPProxyRecord() { clientRef = 0;}
1735 
1736     Uint32 clientData;
1737     BlockReference clientRef;
1738     BlockReference masterRef;
1739 
1740     union { Uint32 nextPool; Uint32 nextList; };
1741     Uint32 prevList;
1742   };
1743   typedef Ptr<WaitGCPProxyRecord> WaitGCPProxyPtr;
1744 
1745   /**
1746    * Wait GCP (master)
1747    */
1748   struct WaitGCPMasterRecord {
WaitGCPMasterRecordDbdih::WaitGCPMasterRecord1749     WaitGCPMasterRecord() { clientRef = 0;}
1750     Uint32 clientData;
1751     BlockReference clientRef;
1752 
1753     union { Uint32 nextPool; Uint32 nextList; };
1754     Uint32 prevList;
1755   };
1756   typedef Ptr<WaitGCPMasterRecord> WaitGCPMasterPtr;
1757 
1758   /**
1759    * Pool/list of WaitGCPProxyRecord record
1760    */
1761   ArrayPool<WaitGCPProxyRecord> waitGCPProxyPool;
1762   DLList<WaitGCPProxyRecord> c_waitGCPProxyList;
1763 
1764   /**
1765    * Pool/list of WaitGCPMasterRecord record
1766    */
1767   ArrayPool<WaitGCPMasterRecord> waitGCPMasterPool;
1768   typedef DLList<WaitGCPMasterRecord> WaitGCPList;
1769   WaitGCPList c_waitGCPMasterList;
1770   WaitGCPList c_waitEpochMasterList;
1771 
1772   void checkWaitGCPProxy(Signal*, NodeId failedNodeId);
1773   void checkWaitGCPMaster(Signal*, NodeId failedNodeId);
1774   void emptyWaitGCPMasterQueue(Signal*, Uint64, WaitGCPList&);
1775 
1776   /**
1777    * Stop me
1778    */
1779   struct StopMeRecord {
StopMeRecordDbdih::StopMeRecord1780     StopMeRecord() { clientRef = 0;}
1781 
1782     BlockReference clientRef;
1783     Uint32 clientData;
1784   };
1785   StopMeRecord c_stopMe;
1786 
1787   void checkStopMe(Signal *, NodeRecordPtr failedNodePtr);
1788 
1789 #define DIH_CDATA_SIZE 128
1790   /**
1791    * This variable must be atleast the size of Sysfile::SYSFILE_SIZE32
1792    */
1793   Uint32 cdata[DIH_CDATA_SIZE];       /* TEMPORARY ARRAY VARIABLE */
1794 
1795   /**
1796    * Sys file data
1797    */
1798   Uint32 sysfileData[DIH_CDATA_SIZE];
1799   Uint32 sysfileDataToFile[DIH_CDATA_SIZE];
1800 
1801   /**
1802    * When a node comes up without filesystem
1803    *   we have to clear all LCP for that node
1804    */
1805   void invalidateNodeLCP(Signal *, Uint32 nodeId, Uint32 tableId);
1806   void invalidateNodeLCP(Signal *, Uint32 nodeId, TabRecordPtr);
1807 
1808   /**
1809    * Reply from nodeId
1810    */
1811   void startInfoReply(Signal *, Uint32 nodeId);
1812 
1813   void dump_replica_info();
1814   void dump_replica_info(const Fragmentstore*);
1815 
1816   // DIH specifics for execNODE_START_REP (sendDictUnlockOrd)
1817   void execNODE_START_REP(Signal* signal);
1818 
1819   /*
1820    * Lock master DICT.  Only current use is by starting node
1821    * during NR.  A pool of slave records is convenient anyway.
1822    */
1823   struct DictLockSlaveRecord {
1824     Uint32 lockPtr;
1825     Uint32 lockType;
1826     bool locked;
1827     Callback callback;
1828     Uint32 nextPool;
1829   };
1830 
1831   typedef Ptr<DictLockSlaveRecord> DictLockSlavePtr;
1832   ArrayPool<DictLockSlaveRecord> c_dictLockSlavePool;
1833 
1834   // slave
1835   void sendDictLockReq(Signal* signal, Uint32 lockType, Callback c);
1836   void recvDictLockConf(Signal* signal);
1837   void sendDictUnlockOrd(Signal* signal, Uint32 lockSlavePtrI);
1838 
1839   // NR
1840   Uint32 c_dictLockSlavePtrI_nodeRestart; // userPtr for NR
1841   void recvDictLockConf_nodeRestart(Signal* signal, Uint32 data, Uint32 ret);
1842 
1843   Uint32 c_error_7181_ref;
1844 
1845 #ifdef ERROR_INSERT
1846   void sendToRandomNodes(const char*, Signal*, SignalCounter*,
1847                          SendFunction,
1848                          Uint32 extra = RNIL,
1849                          Uint32 block = 0, Uint32 gsn = 0, Uint32 len = 0,
1850                          JobBufferLevel = JBB);
1851 #endif
1852 
1853   bool check_enable_micro_gcp(Signal* signal, bool broadcast);
1854 
1855   bool c_sr_wait_to;
1856   NdbNodeBitmask m_sr_nodes;
1857   NdbNodeBitmask m_to_nodes;
1858 
1859   void startme_copygci_conf(Signal*);
1860 
1861   /**
1862    * Local LCP state
1863    *   This struct is more or less a copy of lcp-state
1864    *   Reason for duplicating it is that
1865    *   - not to mess with current code
1866    *   - this one is "distributed", i.e maintained by *all* nodes,
1867    *     not like c_lcpState which mixed master/slave state in a "unnatural"
1868    *     way
1869    */
1870   struct LocalLCPState
1871   {
1872     enum State {
1873       LS_INITIAL = 0,
1874       LS_RUNNING = 1,
1875       LS_COMPLETE = 2
1876     } m_state;
1877 
1878     StartLcpReq m_start_lcp_req;
1879     Uint32 m_keep_gci; // Min GCI is needed to restore LCP
1880     Uint32 m_stop_gci; // This GCI needs to be complete before LCP is restorable
1881 
LocalLCPStateDbdih::LocalLCPState1882     LocalLCPState() { reset();}
1883 
1884     void reset();
1885     void init(const StartLcpReq*);
1886     void lcp_frag_rep(const LcpFragRep*);
1887     void lcp_complete_rep(Uint32 gci);
1888 
1889     /**
1890      * @param gci - current GCI being made restorable (COPY_GCI)
1891      */
1892     bool check_cut_log_tail(Uint32 gci) const;
1893   } m_local_lcp_state;
1894 
1895   // MT LQH
1896   Uint32 c_fragments_per_node;
dihGetInstanceKey(FragmentstorePtr tFragPtr)1897   Uint32 dihGetInstanceKey(FragmentstorePtr tFragPtr) {
1898     ndbrequire(!tFragPtr.isNull());
1899     Uint32 log_part_id = tFragPtr.p->m_log_part_id;
1900     Uint32 instanceKey = 1 + log_part_id % MAX_NDBMT_LQH_WORKERS;
1901     return instanceKey;
1902   }
1903   Uint32 dihGetInstanceKey(Uint32 tabId, Uint32 fragId);
1904 
1905   bool c_2pass_inr;
1906 };
1907 
1908 #if (DIH_CDATA_SIZE < _SYSFILE_SIZE32)
1909 #error "cdata is to small compared to Sysfile size"
1910 #endif
1911 
1912 #endif
1913 
1914