1 /* 2 Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License, version 2.0, 6 as published by the Free Software Foundation. 7 8 This program is also distributed with certain software (including 9 but not limited to OpenSSL) that is licensed under separate terms, 10 as designated in a particular file or component or in included license 11 documentation. The authors of MySQL hereby grant you an additional 12 permission to link the program and your derivative works with the 13 separately licensed software that they have included with MySQL. 14 15 This program is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 GNU General Public License, version 2.0, for more details. 19 20 You should have received a copy of the GNU General Public License 21 along with this program; if not, write to the Free Software 22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 23 */ 24 25 #ifndef DBDIH_H 26 #define DBDIH_H 27 28 #include <ndb_limits.h> 29 #include <pc.hpp> 30 #include <SimulatedBlock.hpp> 31 #include "Sysfile.hpp" 32 #include <SignalCounter.hpp> 33 34 #include <signaldata/MasterLCP.hpp> 35 #include <signaldata/CopyGCIReq.hpp> 36 #include <blocks/mutexes.hpp> 37 #include <signaldata/LCP.hpp> 38 #include <NdbSeqLock.hpp> 39 40 #ifdef DBDIH_C 41 42 /*###################*/ 43 /* FILE SYSTEM FLAGS */ 44 /*###################*/ 45 #define ZLIST_OF_PAIRS 0 46 #define ZLIST_OF_PAIRS_SYNCH 16 47 #define ZOPEN_READ_WRITE 2 48 #define ZCREATE_READ_WRITE 0x302 49 #define ZCLOSE_NO_DELETE 0 50 #define ZCLOSE_DELETE 1 51 52 /*###############*/ 53 /* NODE STATES */ 54 /*###############*/ 55 #define ZIDLE 0 56 #define ZACTIVE 1 57 58 /*#########*/ 59 /* GENERAL */ 60 /*#########*/ 61 #define ZVAR_NO_WORD 1 62 #define ZVAR_NO_CRESTART_INFO 20 63 #define ZVAR_NO_CRESTART_INFO_TO_FILE 21 64 #define ZVALID 1 65 #define ZINVALID 2 66 67 /*###############*/ 68 /* ERROR CODES */ 69 /*###############*/ 70 // ------------------------------------------ 71 // Error Codes for Transactions (None sofar) 72 // ------------------------------------------ 73 #define ZUNDEFINED_FRAGMENT_ERROR 311 74 75 // -------------------------------------- 76 // Error Codes for Add Table 77 // -------------------------------------- 78 #define ZREPLERROR1 306 79 #define ZREPLERROR2 307 80 81 // -------------------------------------- 82 // Crash Codes 83 // -------------------------------------- 84 #define ZCOULD_NOT_OCCUR_ERROR 300 85 #define ZNOT_MASTER_ERROR 301 86 #define ZWRONG_FAILURE_NUMBER_ERROR 302 87 #define ZWRONG_START_NODE_ERROR 303 88 #define ZNO_REPLICA_FOUND_ERROR 304 89 90 // -------------------------------------- 91 // Codes from LQH 92 // -------------------------------------- 93 #define ZNODE_FAILURE_ERROR 400 94 95 96 /*#########*/ 97 /* PHASES */ 98 /*#########*/ 99 #define ZNDB_SPH1 1 100 #define ZNDB_SPH2 2 101 #define ZNDB_SPH3 3 102 #define ZNDB_SPH4 4 103 #define ZNDB_SPH5 5 104 #define ZNDB_SPH6 6 105 #define ZNDB_SPH7 7 106 #define ZNDB_SPH8 8 107 /*#########*/ 108 /* SIZES */ 109 /*#########*/ 110 #define ZPAGEREC 100 111 #define ZCREATE_REPLICA_FILE_SIZE 4 112 #define ZPROXY_MASTER_FILE_SIZE 10 113 #define ZPROXY_FILE_SIZE 10 114 #endif 115 116 class Dbdih: public SimulatedBlock { 117 #ifdef ERROR_INSERT 118 typedef void (Dbdih::* SendFunction)(Signal*, Uint32, Uint32); 119 #endif 120 public: 121 122 // Records 123 124 /*������������������������������������������������������������������������ 125 * THE API CONNECT RECORD IS THE SAME RECORD POINTER AS USED IN THE TC BLOCK 126 * 127 * IT KEEPS TRACK OF ALL THE OPERATIONS CONNECTED TO THIS TRANSACTION. 128 * IT IS LINKED INTO A QUEUE IN CASE THE GLOBAL CHECKPOINT IS CURRENTLY 129 * ONGOING */ 130 struct ApiConnectRecord { 131 Uint64 apiGci; 132 Uint32 senderData; 133 }; 134 typedef Ptr<ApiConnectRecord> ApiConnectRecordPtr; 135 136 /*############## CONNECT_RECORD ##############*/ 137 /*������������������������������������������������������������������������*/ 138 /* THE CONNECT RECORD IS CREATED WHEN A TRANSACTION HAS TO START. IT KEEPS 139 ALL INTERMEDIATE INFORMATION NECESSARY FOR THE TRANSACTION FROM THE 140 DISTRIBUTED MANAGER. THE RECORD KEEPS INFORMATION ABOUT THE 141 OPERATIONS THAT HAVE TO BE CARRIED OUT BY THE TRANSACTION AND 142 ALSO THE TRAIL OF NODES FOR EACH OPERATION IN THE THE 143 TRANSACTION. 144 */ 145 struct ConnectRecord { 146 enum ConnectState { 147 INUSE = 0, 148 FREE = 1, 149 STARTED = 2, 150 ALTER_TABLE = 3, 151 ALTER_TABLE_ABORT = 4, // "local" abort 152 ALTER_TABLE_REVERT = 5, 153 GET_TABINFO = 6 154 }; 155 union { 156 Uint32 nodes[MAX_REPLICAS]; 157 struct { 158 Uint32 m_changeMask; 159 Uint32 m_totalfragments; 160 Uint32 m_org_totalfragments; 161 Uint32 m_new_map_ptr_i; 162 } m_alter; 163 struct { 164 Uint32 m_map_ptr_i; 165 } m_create; 166 struct { 167 Uint32 m_requestInfo; 168 } m_get_tabinfo; 169 }; 170 ConnectState connectState; 171 Uint32 nextPool; 172 Uint32 table; 173 Uint32 userpointer; 174 BlockReference userblockref; 175 Callback m_callback; 176 }; 177 typedef Ptr<ConnectRecord> ConnectRecordPtr; 178 179 /*������������������������������������������������������������������������*/ 180 /* THESE RECORDS ARE USED WHEN CREATING REPLICAS DURING SYSTEM */ 181 /* RESTART. I NEED A COMPLEX DATA STRUCTURE DESCRIBING THE REPLICAS */ 182 /* I WILL TRY TO CREATE FOR EACH FRAGMENT. */ 183 /* */ 184 /* I STORE A REFERENCE TO THE FOUR POSSIBLE CREATE REPLICA RECORDS */ 185 /* IN A COMMON STORED VARIABLE. I ALLOW A MAXIMUM OF 4 REPLICAS TO */ 186 /* BE RESTARTED PER FRAGMENT. */ 187 /*������������������������������������������������������������������������*/ 188 struct CreateReplicaRecord { 189 Uint32 logStartGci[MAX_LOG_EXEC]; 190 Uint32 logStopGci[MAX_LOG_EXEC]; 191 Uint16 logNodeId[MAX_LOG_EXEC]; 192 Uint32 createLcpId; 193 194 Uint32 replicaRec; 195 Uint16 dataNodeId; 196 Uint16 lcpNo; 197 Uint16 noLogNodes; 198 }; 199 typedef Ptr<CreateReplicaRecord> CreateReplicaRecordPtr; 200 201 /*������������������������������������������������������������������������*/ 202 /* THIS RECORD CONTAINS A FILE DESCRIPTION. THERE ARE TWO */ 203 /* FILES PER TABLE TO RAISE SECURITY LEVEL AGAINST DISK CRASHES. */ 204 /*������������������������������������������������������������������������*/ 205 struct FileRecord { 206 enum FileStatus { 207 CLOSED = 0, 208 CRASHED = 1, 209 OPEN = 2 210 }; 211 enum FileType { 212 TABLE_FILE = 0, 213 GCP_FILE = 1 214 }; 215 enum ReqStatus { 216 IDLE = 0, 217 CREATING_GCP = 1, 218 OPENING_GCP = 2, 219 OPENING_COPY_GCI = 3, 220 WRITING_COPY_GCI = 4, 221 CREATING_COPY_GCI = 5, 222 OPENING_TABLE = 6, 223 READING_GCP = 7, 224 READING_TABLE = 8, 225 WRITE_INIT_GCP = 9, 226 TABLE_CREATE = 10, 227 TABLE_WRITE = 11, 228 TABLE_CLOSE = 12, 229 CLOSING_GCP = 13, 230 CLOSING_TABLE_CRASH = 14, 231 CLOSING_TABLE_SR = 15, 232 CLOSING_GCP_CRASH = 16, 233 TABLE_OPEN_FOR_DELETE = 17, 234 TABLE_CLOSE_DELETE = 18 235 }; 236 Uint32 fileName[4]; 237 Uint32 fileRef; 238 FileStatus fileStatus; 239 FileType fileType; 240 Uint32 nextFile; 241 ReqStatus reqStatus; 242 Uint32 tabRef; 243 }; 244 typedef Ptr<FileRecord> FileRecordPtr; 245 246 /*������������������������������������������������������������������������*/ 247 /* THIS RECORD KEEPS THE STORAGE AND DECISIONS INFORMATION OF A FRAGMENT */ 248 /* AND ITS REPLICAS. IF FRAGMENT HAS MORE THAN ONE BACK UP */ 249 /* REPLICA THEN A LIST OF MORE NODES IS ATTACHED TO THIS RECORD. */ 250 /* EACH RECORD IN MORE LIST HAS INFORMATION ABOUT ONE BACKUP. THIS RECORD */ 251 /* ALSO HAVE THE STATUS OF THE FRAGMENT. */ 252 /*������������������������������������������������������������������������*/ 253 /* */ 254 /* FRAGMENTSTORE RECORD ALIGNED TO BE 64 BYTES */ 255 /*������������������������������������������������������������������������*/ 256 struct Fragmentstore { 257 Uint16 activeNodes[MAX_REPLICAS]; 258 Uint32 preferredPrimary; 259 260 Uint32 oldStoredReplicas; /* "DEAD" STORED REPLICAS */ 261 Uint32 storedReplicas; /* "ALIVE" STORED REPLICAS */ 262 Uint32 nextFragmentChunk; 263 264 Uint32 m_log_part_id; 265 266 Uint8 distributionKey; 267 Uint8 fragReplicas; 268 Uint8 noOldStoredReplicas; /* NUMBER OF "DEAD" STORED REPLICAS */ 269 Uint8 noStoredReplicas; /* NUMBER OF "ALIVE" STORED REPLICAS*/ 270 Uint8 noLcpReplicas; ///< No of replicas remaining to be LCP:ed 271 }; 272 typedef Ptr<Fragmentstore> FragmentstorePtr; 273 274 /*########### PAGE RECORD ############*/ 275 /*��������������������������������������������������������������������*/ 276 /* THIS RECORD KEEPS INFORMATION ABOUT NODE GROUPS. */ 277 /*��������������������������������������������������������������������*/ 278 struct NodeGroupRecord { 279 Uint32 nodesInGroup[MAX_REPLICAS + 1]; 280 Uint32 nextReplicaNode; 281 Uint32 nodeCount; 282 Uint32 activeTakeOver; // Which node... 283 Uint32 m_next_log_part; 284 Uint32 nodegroupIndex; 285 Uint32 m_ref_count; 286 }; 287 typedef Ptr<NodeGroupRecord> NodeGroupRecordPtr; 288 /*��������������������������������������������������������������������*/ 289 /* THIS RECORD KEEPS INFORMATION ABOUT NODES. */ 290 /*��������������������������������������������������������������������*/ 291 /* RECORD ALIGNED TO BE 64 BYTES. */ 292 /*��������������������������������������������������������������������*/ 293 enum NodefailHandlingStep { 294 NF_REMOVE_NODE_FROM_TABLE = 1, 295 NF_GCP_TAKE_OVER = 2, 296 NF_LCP_TAKE_OVER = 4 297 }; 298 299 struct NodeRecord { 300 NodeRecord(); 301 302 enum NodeStatus { 303 NOT_IN_CLUSTER = 0, 304 ALIVE = 1, 305 STARTING = 2, 306 DIED_NOW = 3, 307 DYING = 4, 308 DEAD = 5 309 }; 310 311 struct FragmentCheckpointInfo { 312 Uint32 tableId; 313 Uint32 fragId; 314 Uint32 replicaPtr; 315 }; 316 317 Sysfile::ActiveStatus activeStatus; 318 319 NodeStatus nodeStatus; 320 bool useInTransactions; 321 bool allowNodeStart; 322 bool m_inclDihLcp; 323 Uint8 copyCompleted; // 0 = NO :-), 1 = YES, 2 = yes, first WAITING 324 325 FragmentCheckpointInfo startedChkpt[2]; 326 FragmentCheckpointInfo queuedChkpt[2]; 327 328 Bitmask<1> m_nodefailSteps; 329 Uint32 activeTabptr; 330 Uint32 nextNode; 331 Uint32 nodeGroup; 332 333 SignalCounter m_NF_COMPLETE_REP; 334 335 Uint8 dbtcFailCompleted; 336 Uint8 dblqhFailCompleted; 337 Uint8 dbdihFailCompleted; 338 Uint8 dbdictFailCompleted; 339 Uint8 recNODE_FAILREP; 340 341 Uint8 noOfQueuedChkpt; 342 Uint8 noOfStartedChkpt; 343 344 MasterLCPConf::State lcpStateAtTakeOver; 345 Uint32 m_remove_node_from_table_lcp_id; 346 }; 347 typedef Ptr<NodeRecord> NodeRecordPtr; 348 /**********************************************************************/ 349 /* THIS RECORD KEEPS THE INFORMATION ABOUT A TABLE AND ITS FRAGMENTS */ 350 /**********************************************************************/ 351 struct PageRecord { 352 Uint32 word[2048]; 353 /* 8 KBYTE PAGE*/ 354 Uint32 nextfreepage; 355 }; 356 typedef Ptr<PageRecord> PageRecordPtr; 357 358 /************ REPLICA RECORD *************/ 359 /**********************************************************************/ 360 /* THIS RECORD KEEPS THE INFORMATION ABOUT A REPLICA OF A FRAGMENT */ 361 /**********************************************************************/ 362 struct ReplicaRecord { 363 /* -------------------------------------------------------------------- */ 364 /* THE GLOBAL CHECKPOINT IDENTITY WHEN THIS REPLICA WAS CREATED. */ 365 /* THERE IS ONE INDEX PER REPLICA. A REPLICA INDEX IS CREATED WHEN ANODE*/ 366 /* CRASH OCCURS. */ 367 /* -------------------------------------------------------------------- */ 368 Uint32 createGci[8]; 369 /* -------------------------------------------------------------------- */ 370 /* THE LAST GLOBAL CHECKPOINT IDENTITY WHICH HAS BEEN SAVED ON DISK. */ 371 /* THIS VARIABLE IS ONLY VALID FOR REPLICAS WHICH HAVE "DIED". A REPLICA*/ 372 /* "DIES" EITHER WHEN THE NODE CRASHES THAT KEPT THE REPLICA OR BY BEING*/ 373 /* STOPPED IN A CONTROLLED MANNER. */ 374 /* THERE IS ONE INDEX PER REPLICA. A REPLICA INDEX IS CREATED WHEN ANODE*/ 375 /* CRASH OCCURS. */ 376 /* -------------------------------------------------------------------- */ 377 Uint32 replicaLastGci[8]; 378 /* -------------------------------------------------------------------- */ 379 /* THE LOCAL CHECKPOINT IDENTITY OF A LOCAL CHECKPOINT. */ 380 /* -------------------------------------------------------------------- */ 381 Uint32 lcpId[MAX_LCP_STORED]; 382 /* -------------------------------------------------------------------- */ 383 /* THIS VARIABLE KEEPS TRACK OF THE MAXIMUM GLOBAL CHECKPOINT COMPLETED */ 384 /* FOR EACH OF THE LOCAL CHECKPOINTS IN THIS FRAGMENT REPLICA. */ 385 /* -------------------------------------------------------------------- */ 386 Uint32 maxGciCompleted[MAX_LCP_STORED]; 387 /* -------------------------------------------------------------------- */ 388 /* THIS VARIABLE KEEPS TRACK OF THE MINIMUM GLOBAL CHECKPOINT STARTEDFOR*/ 389 /* EACH OF THE LOCAL CHECKPOINTS IN THIS FRAGMENT REPLICA. */ 390 /* -------------------------------------------------------------------- */ 391 Uint32 maxGciStarted[MAX_LCP_STORED]; 392 /* -------------------------------------------------------------------- */ 393 /* THE GLOBAL CHECKPOINT IDENTITY WHEN THE TABLE WAS CREATED. */ 394 /* -------------------------------------------------------------------- */ 395 Uint32 initialGci; 396 397 /* -------------------------------------------------------------------- */ 398 /* THE REFERENCE TO THE NEXT REPLICA. EITHER IT REFERS TO THE NEXT IN */ 399 /* THE FREE LIST OR IT REFERS TO THE NEXT IN A LIST OF REPLICAS ON A */ 400 /* FRAGMENT. */ 401 /* -------------------------------------------------------------------- */ 402 Uint32 nextReplica; 403 404 /* -------------------------------------------------------------------- */ 405 /* THE NODE ID WHERE THIS REPLICA IS STORED. */ 406 /* -------------------------------------------------------------------- */ 407 Uint16 procNode; 408 409 /* -------------------------------------------------------------------- */ 410 /* The last local checkpoint id started or queued on this replica. */ 411 /* -------------------------------------------------------------------- */ 412 union { 413 Uint32 lcpIdStarted; // Started or queued 414 Uint32 m_restorable_gci; 415 }; 416 417 /* -------------------------------------------------------------------- */ 418 /* THIS VARIABLE SPECIFIES WHAT THE STATUS OF THE LOCAL CHECKPOINT IS.IT*/ 419 /* CAN EITHER BE VALID OR INVALID. AT CREATION OF A FRAGMENT REPLICA ALL*/ 420 /* LCP'S ARE INVALID. ALSO IF IF INDEX >= NO_LCP THEN THELOCALCHECKPOINT*/ 421 /* IS ALWAYS INVALID. IF THE LCP BEFORE THE NEXT_LCP HAS LCP_ID THAT */ 422 /* DIFFERS FROM THE LATEST LCP_ID STARTED THEN THE NEXT_LCP IS ALSO */ 423 /* INVALID */ 424 /* -------------------------------------------------------------------- */ 425 Uint8 lcpStatus[MAX_LCP_STORED]; 426 427 /* -------------------------------------------------------------------- */ 428 /* THE NEXT LOCAL CHECKPOINT TO EXECUTE IN THIS FRAGMENT REPLICA. */ 429 /* -------------------------------------------------------------------- */ 430 Uint8 nextLcp; 431 432 /* -------------------------------------------------------------------- */ 433 /* THE NUMBER OF CRASHED REPLICAS IN THIS REPLICAS SO FAR. */ 434 /* -------------------------------------------------------------------- */ 435 Uint8 noCrashedReplicas; 436 437 /** 438 * Is a LCP currently ongoing on fragment 439 */ 440 Uint8 lcpOngoingFlag; 441 }; 442 typedef Ptr<ReplicaRecord> ReplicaRecordPtr; 443 444 /************************************************************************* 445 * TAB_DESCRIPTOR IS A DESCRIPTOR OF THE LOCATION OF THE FRAGMENTS BELONGING 446 * TO THE TABLE.THE INFORMATION ABOUT FRAGMENTS OF A TABLE ARE STORED IN 447 * CHUNKS OF FRAGMENTSTORE RECORDS. 448 * THIS RECORD ALSO HAS THE NECESSARY INFORMATION TO LOCATE A FRAGMENT AND 449 * TO LOCATE A FRAGMENT AND TO TRANSLATE A KEY OF A TUPLE TO THE FRAGMENT IT 450 * BELONGS 451 */ 452 struct TabRecord 453 { TabRecordDbdih::TabRecord454 TabRecord() { } 455 456 /** 457 * rw-lock that protects multiple parallel DIGETNODES (readers) from 458 * updates to fragmenation changes (e.g CREATE_FRAGREQ)... 459 * search for DIH_TAB_WRITE_LOCK 460 */ 461 NdbSeqLock m_lock; 462 463 /** 464 * State for copying table description into pages 465 */ 466 enum CopyStatus { 467 CS_IDLE, 468 CS_SR_PHASE1_READ_PAGES, 469 CS_SR_PHASE2_READ_TABLE, 470 CS_SR_PHASE3_COPY_TABLE, 471 CS_REMOVE_NODE, 472 CS_LCP_READ_TABLE, 473 CS_COPY_TAB_REQ, 474 CS_COPY_NODE_STATE, 475 CS_ADD_TABLE_MASTER, 476 CS_ADD_TABLE_SLAVE, 477 CS_INVALIDATE_NODE_LCP, 478 CS_ALTER_TABLE, 479 CS_COPY_TO_SAVE 480 ,CS_GET_TABINFO 481 }; 482 /** 483 * State for copying pages to disk 484 */ 485 enum UpdateState { 486 US_IDLE, 487 US_LOCAL_CHECKPOINT, 488 US_REMOVE_NODE, 489 US_COPY_TAB_REQ, 490 US_ADD_TABLE_MASTER, 491 US_ADD_TABLE_SLAVE, 492 US_INVALIDATE_NODE_LCP, 493 US_CALLBACK 494 }; 495 enum TabLcpStatus { 496 TLS_ACTIVE = 1, 497 TLS_WRITING_TO_FILE = 2, 498 TLS_COMPLETED = 3 499 }; 500 enum TabStatus { 501 TS_IDLE = 0, 502 TS_ACTIVE = 1, 503 TS_CREATING = 2, 504 TS_DROPPING = 3 505 }; 506 enum Method { 507 LINEAR_HASH = 0, 508 NOTDEFINED = 1, 509 NORMAL_HASH = 2, 510 USER_DEFINED = 3, 511 HASH_MAP = 4 512 }; 513 enum Storage { 514 ST_NOLOGGING = 0, // Table is not logged, but survives SR 515 ST_NORMAL = 1, // Normal table, logged and durable 516 ST_TEMPORARY = 2 // Table is lost after SR, not logged 517 }; 518 CopyStatus tabCopyStatus; 519 UpdateState tabUpdateState; 520 TabLcpStatus tabLcpStatus; 521 TabStatus tabStatus; 522 Method method; 523 Storage tabStorage; 524 525 Uint32 pageRef[32]; 526 //----------------------------------------------------------------------------- 527 // Each entry in this array contains a reference to 16 fragment records in a 528 // row. Thus finding the correct record is very quick provided the fragment id. 529 //----------------------------------------------------------------------------- 530 Uint32 startFid[MAX_NDB_NODES * MAX_FRAG_PER_NODE / NO_OF_FRAGS_PER_CHUNK]; 531 532 Uint32 tabFile[2]; 533 Uint32 connectrec; 534 union { 535 Uint32 hashpointer; 536 Uint32 m_new_map_ptr_i; 537 }; 538 union { 539 Uint32 mask; 540 Uint32 m_map_ptr_i; 541 }; 542 Uint32 noOfWords; 543 Uint32 schemaVersion; 544 Uint32 tabRemoveNode; 545 Uint32 totalfragments; 546 Uint32 noOfFragChunks; 547 Uint32 m_scan_count[2]; 548 Uint32 m_scan_reorg_flag; 549 Uint32 tabErrorCode; 550 struct { 551 Uint32 tabUserRef; 552 Uint32 tabUserPtr; 553 } m_dropTab; 554 555 Uint8 kvalue; 556 Uint8 noOfBackups; 557 Uint8 noPages; 558 Uint16 tableType; 559 Uint16 primaryTableId; 560 561 // set in local protocol during prepare until commit 562 Uint32 schemaTransId; 563 }; 564 typedef Ptr<TabRecord> TabRecordPtr; 565 566 /***************************************************************************/ 567 /* THIS RECORD IS USED TO KEEP TRACK OF TAKE OVER AND STARTING A NODE. */ 568 /* WE KEEP IT IN A RECORD TO ENABLE IT TO BE PARALLELISED IN THE FUTURE. */ 569 /**************************************************************************/ 570 struct TakeOverRecord { 571 572 /** 573 * States possible on slave (starting node) 574 */ 575 enum ToSlaveStatus { 576 TO_SLAVE_IDLE = 0 577 ,TO_START_FRAGMENTS = 1 // Finding LCP for each fragment 578 ,TO_RUN_REDO = 2 // Waiting for local LQH to run REDO 579 ,TO_START_TO = 3 // Waiting for master (START_TOREQ) 580 ,TO_SELECTING_NEXT = 4 // Selecting next fragment to copy 581 ,TO_PREPARE_COPY = 5 // Waiting for local LQH (PREPARE_COPYREQ) 582 ,TO_UPDATE_BEFORE_STORED = 6 // Waiting on master (UPDATE_TOREQ) 583 ,TO_CREATE_FRAG_STORED = 7 // Waiting for all (CREATE_FRAGREQ stored) 584 ,TO_UPDATE_AFTER_STORED = 8 // Waiting for master (UPDATE_TOREQ) 585 ,TO_COPY_FRAG = 9 // Waiting for copy node (COPY_FRAGREQ) 586 ,TO_COPY_ACTIVE = 10 // Waiting for local LQH (COPY_ACTIVEREQ) 587 ,TO_UPDATE_BEFORE_COMMIT = 11// Waiting for master (UPDATE_TOREQ) 588 ,TO_CREATE_FRAG_COMMIT = 12 // Waiting for all (CREATE_FRAGREQ commit) 589 ,TO_UPDATE_AFTER_COMMIT = 13 // Waiting for master (UPDATE_TOREQ) 590 591 ,TO_START_LOGGING = 14 // Enabling logging on all fragments 592 ,TO_SL_COPY_ACTIVE = 15 // Start logging: Copy active (local) 593 ,TO_SL_CREATE_FRAG = 16 // Start logging: Create Frag (dist) 594 ,TO_END_TO = 17 // Waiting for master (EBND_TOREQ) 595 }; 596 597 /** 598 * States possible on master 599 */ 600 enum ToMasterStatus { 601 TO_MASTER_IDLE = 0 602 ,TO_MUTEX_BEFORE_STORED = 1 // Waiting for lock 603 ,TO_MUTEX_BEFORE_LOCKED = 2 // Lock held 604 ,TO_AFTER_STORED = 3 // No lock, but NGPtr reservation 605 ,TO_MUTEX_BEFORE_COMMIT = 4 // Waiting for lock 606 ,TO_MUTEX_BEFORE_SWITCH_REPLICA = 5 // Waiting for switch replica lock 607 ,TO_MUTEX_AFTER_SWITCH_REPLICA = 6 608 ,TO_WAIT_LCP = 7 // No locks, waiting for LCP 609 }; 610 611 Uint32 m_flags; // 612 Uint32 m_senderRef; // Who requested START_COPYREQ 613 Uint32 m_senderData; // Data of sender 614 615 Uint32 restorableGci; // Which GCI can be restore "locally" by node 616 Uint32 startGci; 617 Uint32 maxPage; 618 Uint32 toCopyNode; 619 Uint32 toCurrentFragid; 620 Uint32 toCurrentReplica; 621 Uint32 toCurrentTabref; 622 Uint32 toFailedNode; 623 Uint32 toStartingNode; 624 Uint64 toStartTime; 625 ToSlaveStatus toSlaveStatus; 626 ToMasterStatus toMasterStatus; 627 628 MutexHandle2<DIH_SWITCH_PRIMARY_MUTEX> m_switchPrimaryMutexHandle; 629 MutexHandle2<DIH_FRAGMENT_INFO> m_fragmentInfoMutex; 630 631 Uint32 nextList; 632 union { 633 Uint32 prevList; 634 Uint32 nextPool; 635 }; 636 }; 637 typedef Ptr<TakeOverRecord> TakeOverRecordPtr; 638 getParam(const char * param,Uint32 * retVal)639 virtual bool getParam(const char * param, Uint32 * retVal) { 640 if (param && strcmp(param, "ActiveMutexes") == 0) 641 { 642 if (retVal) 643 { 644 * retVal = 5 + MAX_NDB_NODES; 645 } 646 return true; 647 } 648 return false; 649 } 650 651 public: 652 Dbdih(Block_context& ctx); 653 virtual ~Dbdih(); 654 655 struct RWFragment { 656 Uint32 pageIndex; 657 Uint32 wordIndex; 658 Uint32 fragId; 659 TabRecordPtr rwfTabPtr; 660 PageRecordPtr rwfPageptr; 661 Uint32 totalfragments; 662 }; 663 struct CopyTableNode { 664 Uint32 pageIndex; 665 Uint32 wordIndex; 666 Uint32 noOfWords; 667 TabRecordPtr ctnTabPtr; 668 PageRecordPtr ctnPageptr; 669 }; 670 671 private: 672 friend class SimulatedBlock; 673 BLOCK_DEFINES(Dbdih); 674 675 void execDUMP_STATE_ORD(Signal *); 676 void execNDB_TAMPER(Signal *); 677 void execDEBUG_SIG(Signal *); 678 void execEMPTY_LCP_CONF(Signal *); 679 void execEMPTY_LCP_REP(Signal*); 680 void execMASTER_GCPREF(Signal *); 681 void execMASTER_GCPREQ(Signal *); 682 void execMASTER_GCPCONF(Signal *); 683 void execMASTER_LCPREF(Signal *); 684 void execMASTER_LCPREQ(Signal *); 685 void execMASTER_LCPCONF(Signal *); 686 void execNF_COMPLETEREP(Signal *); 687 void execSTART_PERMREQ(Signal *); 688 void execSTART_PERMCONF(Signal *); 689 void execSTART_PERMREF(Signal *); 690 void execINCL_NODEREQ(Signal *); 691 void execINCL_NODECONF(Signal *); 692 693 void execSTART_TOREQ(Signal *); 694 void execSTART_TOREF(Signal *); 695 void execSTART_TOCONF(Signal*); 696 697 void execEND_TOREQ(Signal *); 698 void execEND_TOREF(Signal *); 699 void execEND_TOCONF(Signal*); 700 701 void execUPDATE_TOREQ(Signal* signal); 702 void execUPDATE_TOREF(Signal* signal); 703 void execUPDATE_TOCONF(Signal* signal); 704 705 void execSTART_MEREQ(Signal *); 706 void execSTART_MECONF(Signal *); 707 void execSTART_MEREF(Signal *); 708 void execSTART_COPYREQ(Signal *); 709 void execSTART_COPYCONF(Signal *); 710 void execSTART_COPYREF(Signal *); 711 void execCREATE_FRAGREQ(Signal *); 712 void execCREATE_FRAGCONF(Signal *); 713 void execDIVERIFYREQ(Signal *); 714 void execGCP_SAVEREQ(Signal *); 715 void execGCP_SAVECONF(Signal *); 716 void execGCP_PREPARECONF(Signal *); 717 void execGCP_PREPARE(Signal *); 718 void execGCP_NODEFINISH(Signal *); 719 void execGCP_COMMIT(Signal *); 720 void execSUB_GCP_COMPLETE_REP(Signal *); 721 void execSUB_GCP_COMPLETE_ACK(Signal *); 722 void execDIHNDBTAMPER(Signal *); 723 void execCONTINUEB(Signal *); 724 void execCOPY_GCIREQ(Signal *); 725 void execCOPY_GCICONF(Signal *); 726 void execCOPY_TABREQ(Signal *); 727 void execCOPY_TABCONF(Signal *); 728 void execTCGETOPSIZECONF(Signal *); 729 void execTC_CLOPSIZECONF(Signal *); 730 731 void execDIH_GET_TABINFO_REQ(Signal*); 732 733 int handle_invalid_lcp_no(const struct LcpFragRep*, ReplicaRecordPtr); 734 void execLCP_FRAG_REP(Signal *); 735 void execLCP_COMPLETE_REP(Signal *); 736 void execSTART_LCP_REQ(Signal *); 737 void execSTART_LCP_CONF(Signal *); 738 MutexHandle2<DIH_START_LCP_MUTEX> c_startLcpMutexHandle; 739 void startLcpMutex_locked(Signal* signal, Uint32, Uint32); 740 void startLcpMutex_unlocked(Signal* signal, Uint32, Uint32); 741 void lcpFragmentMutex_locked(Signal* signal, Uint32, Uint32); 742 void master_lcp_fragmentMutex_locked(Signal* signal, Uint32, Uint32); 743 744 MutexHandle2<DIH_SWITCH_PRIMARY_MUTEX> c_switchPrimaryMutexHandle; 745 void switchPrimaryMutex_locked(Signal* signal, Uint32, Uint32); 746 void switchPrimaryMutex_unlocked(Signal* signal, Uint32, Uint32); 747 void check_force_lcp(Ptr<TakeOverRecord> takeOverPtr); 748 749 void switch_primary_stop_node(Signal* signal, Uint32, Uint32); 750 751 void updateToReq_fragmentMutex_locked(Signal*, Uint32, Uint32); 752 753 MutexHandle2<DIH_FRAGMENT_INFO> c_fragmentInfoMutex_lcp; 754 755 void execBLOCK_COMMIT_ORD(Signal *); 756 void execUNBLOCK_COMMIT_ORD(Signal *); 757 758 void execDIH_SWITCH_REPLICA_REQ(Signal *); 759 void execDIH_SWITCH_REPLICA_REF(Signal *); 760 void execDIH_SWITCH_REPLICA_CONF(Signal *); 761 762 void execSTOP_PERM_REQ(Signal *); 763 void execSTOP_PERM_REF(Signal *); 764 void execSTOP_PERM_CONF(Signal *); 765 766 void execSTOP_ME_REQ(Signal *); 767 void execSTOP_ME_REF(Signal *); 768 void execSTOP_ME_CONF(Signal *); 769 770 void execREAD_CONFIG_REQ(Signal *); 771 void execUNBLO_DICTCONF(Signal *); 772 void execCOPY_ACTIVECONF(Signal *); 773 void execTAB_COMMITREQ(Signal *); 774 void execNODE_FAILREP(Signal *); 775 void execCOPY_FRAGCONF(Signal *); 776 void execCOPY_FRAGREF(Signal *); 777 void execPREPARE_COPY_FRAG_REF(Signal*); 778 void execPREPARE_COPY_FRAG_CONF(Signal*); 779 void execDIADDTABREQ(Signal *); 780 void execDIGETNODESREQ(Signal *); 781 void execSTTOR(Signal *); 782 void execDIH_SCAN_TAB_REQ(Signal *); 783 void execDIH_SCAN_GET_NODES_REQ(Signal *); 784 void execDIH_SCAN_TAB_COMPLETE_REP(Signal*); 785 void execGCP_SAVEREF(Signal *); 786 void execGCP_TCFINISHED(Signal *); 787 void execGCP_TCFINISHED_sync_conf(Signal* signal, Uint32 cb, Uint32 err); 788 void execREAD_NODESCONF(Signal *); 789 void execNDB_STTOR(Signal *); 790 void execDICTSTARTCONF(Signal *); 791 void execNDB_STARTREQ(Signal *); 792 void execGETGCIREQ(Signal *); 793 void execDIH_RESTARTREQ(Signal *); 794 void execSTART_RECCONF(Signal *); 795 void execSTART_FRAGREF(Signal *); 796 void execSTART_FRAGCONF(Signal *); 797 void execADD_FRAGCONF(Signal *); 798 void execADD_FRAGREF(Signal *); 799 void execDROP_FRAG_REF(Signal *); 800 void execDROP_FRAG_CONF(Signal *); 801 void execFSOPENCONF(Signal *); 802 void execFSOPENREF(Signal *); 803 void execFSCLOSECONF(Signal *); 804 void execFSCLOSEREF(Signal *); 805 void execFSREADCONF(Signal *); 806 void execFSREADREF(Signal *); 807 void execFSWRITECONF(Signal *); 808 void execFSWRITEREF(Signal *); 809 void execCHECKNODEGROUPSREQ(Signal *); 810 void execSTART_INFOREQ(Signal*); 811 void execSTART_INFOREF(Signal*); 812 void execSTART_INFOCONF(Signal*); 813 void execWAIT_GCP_REQ(Signal* signal); 814 void execWAIT_GCP_REF(Signal* signal); 815 void execWAIT_GCP_CONF(Signal* signal); 816 817 void execPREP_DROP_TAB_REQ(Signal* signal); 818 void execDROP_TAB_REQ(Signal* signal); 819 820 void execALTER_TAB_REQ(Signal* signal); 821 822 void execCREATE_FRAGMENTATION_REQ(Signal*); 823 824 void waitDropTabWritingToFile(Signal *, TabRecordPtr tabPtr); 825 void checkDropTabComplete(Signal *, TabRecordPtr tabPtr); 826 827 void execDICT_LOCK_CONF(Signal* signal); 828 void execDICT_LOCK_REF(Signal* signal); 829 830 void execUPGRADE_PROTOCOL_ORD(Signal* signal); 831 832 void execCREATE_NODEGROUP_IMPL_REQ(Signal*); 833 void execDROP_NODEGROUP_IMPL_REQ(Signal*); 834 835 // Statement blocks 836 //------------------------------------ 837 // Methods that send signals 838 //------------------------------------ 839 void nullRoutine(Signal *, Uint32 nodeId, Uint32); 840 void sendCOPY_GCIREQ(Signal *, Uint32 nodeId, Uint32); 841 void sendDIH_SWITCH_REPLICA_REQ(Signal *, Uint32 nodeId, Uint32); 842 void sendEMPTY_LCP_REQ(Signal *, Uint32 nodeId, Uint32); 843 void sendEND_TOREQ(Signal *, Uint32 nodeId, Uint32); 844 void sendGCP_COMMIT(Signal *, Uint32 nodeId, Uint32); 845 void sendGCP_PREPARE(Signal *, Uint32 nodeId, Uint32); 846 void sendGCP_SAVEREQ(Signal *, Uint32 nodeId, Uint32); 847 void sendSUB_GCP_COMPLETE_REP(Signal*, Uint32 nodeId, Uint32); 848 void sendINCL_NODEREQ(Signal *, Uint32 nodeId, Uint32); 849 void sendMASTER_GCPREQ(Signal *, Uint32 nodeId, Uint32); 850 void sendMASTER_LCPREQ(Signal *, Uint32 nodeId, Uint32); 851 void sendMASTER_LCPCONF(Signal * signal); 852 void sendSTART_RECREQ(Signal *, Uint32 nodeId, Uint32); 853 void sendSTART_INFOREQ(Signal *, Uint32 nodeId, Uint32); 854 void sendSTART_TOREQ(Signal *, Uint32 nodeId, Uint32); 855 void sendSTOP_ME_REQ(Signal *, Uint32 nodeId, Uint32); 856 void sendTC_CLOPSIZEREQ(Signal *, Uint32 nodeId, Uint32); 857 void sendTCGETOPSIZEREQ(Signal *, Uint32 nodeId, Uint32); 858 void sendUPDATE_TOREQ(Signal *, Uint32 nodeId, Uint32); 859 void sendSTART_LCP_REQ(Signal *, Uint32 nodeId, Uint32); 860 861 void sendLCP_FRAG_ORD(Signal*, NodeRecord::FragmentCheckpointInfo info); 862 void sendLastLCP_FRAG_ORD(Signal *); 863 864 void sendCopyTable(Signal *, CopyTableNode* ctn, 865 BlockReference ref, Uint32 reqinfo); 866 void sendCreateFragReq(Signal *, 867 Uint32 startGci, 868 Uint32 storedType, 869 Uint32 takeOverPtr); 870 void sendDihfragreq(Signal *, 871 TabRecordPtr regTabPtr, 872 Uint32 fragId); 873 874 void sendStartTo(Signal* signal, TakeOverRecordPtr); 875 void sendUpdateTo(Signal* signal, TakeOverRecordPtr); 876 877 void sendStartFragreq(Signal *, 878 TabRecordPtr regTabPtr, 879 Uint32 fragId); 880 void sendAddFragreq(Signal *, 881 TabRecordPtr regTabPtr, 882 Uint32 fragId, 883 Uint32 lcpNo, 884 Uint32 param); 885 886 void sendAddFragreq(Signal*, ConnectRecordPtr, TabRecordPtr, Uint32 fragId); 887 void addTable_closeConf(Signal* signal, Uint32 tabPtrI); 888 void resetReplicaSr(TabRecordPtr tabPtr); 889 void resetReplicaLcp(ReplicaRecord * replicaP, Uint32 stopGci); 890 void resetReplica(Ptr<ReplicaRecord>); 891 892 //------------------------------------ 893 // Methods for LCP functionality 894 //------------------------------------ 895 void checkKeepGci(TabRecordPtr, Uint32, Fragmentstore*, Uint32); 896 void checkLcpStart(Signal *, Uint32 lineNo); 897 void checkStartMoreLcp(Signal *, Uint32 nodeId); 898 bool reportLcpCompletion(const struct LcpFragRep *); 899 void sendLCP_COMPLETE_REP(Signal *); 900 901 //------------------------------------ 902 // Methods for Delete Table Files 903 //------------------------------------ 904 void startDeleteFile(Signal* signal, TabRecordPtr tabPtr); 905 void openTableFileForDelete(Signal* signal, Uint32 fileIndex); 906 void tableOpenLab(Signal* signal, FileRecordPtr regFilePtr); 907 void tableDeleteLab(Signal* signal, FileRecordPtr regFilePtr); 908 909 //------------------------------------ 910 // File Record specific methods 911 //------------------------------------ 912 void closeFile(Signal *, FileRecordPtr regFilePtr); 913 void closeFileDelete(Signal *, FileRecordPtr regFilePtr); 914 void createFileRw(Signal *, FileRecordPtr regFilePtr); 915 void openFileRw(Signal *, FileRecordPtr regFilePtr); 916 void openFileRo(Signal *, FileRecordPtr regFilePtr); 917 void seizeFile(FileRecordPtr& regFilePtr); 918 void releaseFile(Uint32 fileIndex); 919 920 //------------------------------------ 921 // Methods called when completing file 922 // operation. 923 //------------------------------------ 924 void creatingGcpLab(Signal *, FileRecordPtr regFilePtr); 925 void openingGcpLab(Signal *, FileRecordPtr regFilePtr); 926 void openingTableLab(Signal *, FileRecordPtr regFilePtr); 927 void tableCreateLab(Signal *, FileRecordPtr regFilePtr); 928 void creatingGcpErrorLab(Signal *, FileRecordPtr regFilePtr); 929 void openingCopyGciErrorLab(Signal *, FileRecordPtr regFilePtr); 930 void creatingCopyGciErrorLab(Signal *, FileRecordPtr regFilePtr); 931 void openingGcpErrorLab(Signal *, FileRecordPtr regFilePtr); 932 void openingTableErrorLab(Signal *, FileRecordPtr regFilePtr); 933 void tableCreateErrorLab(Signal *, FileRecordPtr regFilePtr); 934 void closingGcpLab(Signal *, FileRecordPtr regFilePtr); 935 void closingGcpCrashLab(Signal *, FileRecordPtr regFilePtr); 936 void closingTableCrashLab(Signal *, FileRecordPtr regFilePtr); 937 void closingTableSrLab(Signal *, FileRecordPtr regFilePtr); 938 void tableCloseLab(Signal *, FileRecordPtr regFilePtr); 939 void tableCloseErrorLab(FileRecordPtr regFilePtr); 940 void readingGcpLab(Signal *, FileRecordPtr regFilePtr); 941 void readingTableLab(Signal *, FileRecordPtr regFilePtr); 942 void readingGcpErrorLab(Signal *, FileRecordPtr regFilePtr); 943 void readingTableErrorLab(Signal *, FileRecordPtr regFilePtr); 944 void writingCopyGciLab(Signal *, FileRecordPtr regFilePtr); 945 void writeInitGcpLab(Signal *, FileRecordPtr regFilePtr); 946 void tableWriteLab(Signal *, FileRecordPtr regFilePtr); 947 void writeInitGcpErrorLab(Signal *, FileRecordPtr regFilePtr); 948 949 950 void checkEscalation(); 951 void clearRestartInfoBits(Signal *); 952 void invalidateLcpInfoAfterSr(Signal*); 953 954 bool isMaster(); 955 bool isActiveMaster(); 956 957 void handleGcpStateInMaster(Signal *, NodeRecordPtr failedNodeptr); 958 void initRestartInfo(Signal*); 959 void initRestorableGciFiles(); 960 void makeNodeGroups(Uint32 nodeArray[]); 961 void add_nodegroup(NodeGroupRecordPtr); 962 void inc_ng_refcount(Uint32 ng); 963 void dec_ng_refcount(Uint32 ng); 964 965 void makePrnList(class ReadNodesConf * readNodes, Uint32 nodeArray[]); 966 void nodeResetStart(Signal* signal); 967 void releaseTabPages(Uint32 tableId); 968 void replication(Uint32 noOfReplicas, 969 NodeGroupRecordPtr NGPtr, 970 FragmentstorePtr regFragptr); 971 void sendDihRestartRef(Signal*); 972 void selectMasterCandidateAndSend(Signal *); 973 void setLcpActiveStatusEnd(Signal*); 974 void setLcpActiveStatusStart(Signal *); 975 void setNodeActiveStatus(); 976 void setNodeGroups(); 977 void setNodeInfo(Signal *); 978 void setNodeLcpActiveStatus(); 979 void setNodeRestartInfoBits(Signal*); 980 void startGcp(Signal *); 981 void startGcpMonitor(Signal*); 982 983 void readFragment(RWFragment* rf, FragmentstorePtr regFragptr); 984 Uint32 readPageWord(RWFragment* rf); 985 void readReplica(RWFragment* rf, ReplicaRecordPtr readReplicaPtr); 986 void readReplicas(RWFragment* rf, FragmentstorePtr regFragptr); 987 void readRestorableGci(Signal *, FileRecordPtr regFilePtr); 988 void readTabfile(Signal *, TabRecord* tab, FileRecordPtr regFilePtr); 989 void writeFragment(RWFragment* wf, FragmentstorePtr regFragptr); 990 void writePageWord(RWFragment* wf, Uint32 dataWord); 991 void writeReplicas(RWFragment* wf, Uint32 replicaStartIndex); 992 void writeRestorableGci(Signal *, FileRecordPtr regFilePtr); 993 void writeTabfile(Signal *, TabRecord* tab, FileRecordPtr regFilePtr); 994 void copyTabReq_complete(Signal* signal, TabRecordPtr tabPtr); 995 996 void gcpcommitreqLab(Signal *); 997 void copyGciLab(Signal *, CopyGCIReq::CopyReason reason); 998 void storeNewLcpIdLab(Signal *); 999 void startLcpRoundLoopLab(Signal *, Uint32 startTableId, Uint32 startFragId); 1000 1001 void nodeFailCompletedCheckLab(Signal*, NodeRecordPtr failedNodePtr); 1002 1003 /** 1004 * 1005 */ 1006 void setLocalNodefailHandling(Signal*, Uint32 failedNodeId, 1007 NodefailHandlingStep step); 1008 void checkLocalNodefailComplete(Signal*, Uint32 failedNodeId, 1009 NodefailHandlingStep step); 1010 1011 Callback m_sendSTTORRY; 1012 void sendSTTORRY(Signal*, Uint32 senderData = 0, Uint32 retVal = 0); 1013 void ndbsttorry10Lab(Signal *, Uint32 _line); 1014 void createMutexes(Signal* signal, Uint32 no); 1015 void createMutex_done(Signal* signal, Uint32 no, Uint32 retVal); 1016 void dumpGcpStop(); 1017 void crashSystemAtGcpStop(Signal *, bool); 1018 void sendFirstDictfragsreq(Signal *, TabRecordPtr regTabPtr); 1019 void addtabrefuseLab(Signal *, ConnectRecordPtr regConnectPtr, Uint32 errorCode); 1020 void GCP_SAVEhandling(Signal *, Uint32 nodeId); 1021 void packTableIntoPagesLab(Signal *, Uint32 tableId); 1022 void readPagesIntoTableLab(Signal *, Uint32 tableId); 1023 void readPagesIntoFragLab(Signal *, RWFragment* rf); 1024 void readTabDescriptionLab(Signal *, Uint32 tableId); 1025 void copyTableLab(Signal *, Uint32 tableId); 1026 void breakCopyTableLab(Signal *, 1027 TabRecordPtr regTabPtr, 1028 Uint32 nodeId); 1029 void checkAddfragCompletedLab(Signal *, 1030 TabRecordPtr regTabPtr, 1031 Uint32 fragId); 1032 void completeRestartLab(Signal *); 1033 void readTableFromPagesLab(Signal *, TabRecordPtr regTabPtr); 1034 void srPhase2ReadTableLab(Signal *, TabRecordPtr regTabPtr); 1035 void checkTcCounterLab(Signal *); 1036 void calculateKeepGciLab(Signal *, Uint32 tableId, Uint32 fragId); 1037 void tableUpdateLab(Signal *, TabRecordPtr regTabPtr); 1038 void checkLcpCompletedLab(Signal *); 1039 void initLcpLab(Signal *, Uint32 masterRef, Uint32 tableId); 1040 void startGcpLab(Signal *, Uint32 aWaitTime); 1041 void checkGcpStopLab(Signal *); 1042 void MASTER_GCPhandling(Signal *, Uint32 failedNodeId); 1043 void MASTER_LCPhandling(Signal *, Uint32 failedNodeId); 1044 void rnfTableNotReadyLab(Signal *, TabRecordPtr regTabPtr, Uint32 removeNodeId); 1045 void startLcpTakeOverLab(Signal *, Uint32 failedNodeId); 1046 1047 void startLcpMasterTakeOver(Signal *, Uint32 failedNodeId); 1048 void startGcpMasterTakeOver(Signal *, Uint32 failedNodeId); 1049 void checkGcpOutstanding(Signal*, Uint32 failedNodeId); 1050 1051 void checkEmptyLcpComplete(Signal *); 1052 void lcpBlockedLab(Signal *, Uint32, Uint32); 1053 void breakCheckTabCompletedLab(Signal *, TabRecordPtr regTabptr); 1054 void readGciFileLab(Signal *); 1055 void openingCopyGciSkipInitLab(Signal *, FileRecordPtr regFilePtr); 1056 void startLcpRoundLab(Signal *); 1057 void gcpBlockedLab(Signal *); 1058 void initialStartCompletedLab(Signal *); 1059 void allNodesLcpCompletedLab(Signal *); 1060 void nodeRestartPh2Lab(Signal *); 1061 void nodeRestartPh2Lab2(Signal *); 1062 void initGciFilesLab(Signal *); 1063 void dictStartConfLab(Signal *); 1064 void nodeDictStartConfLab(Signal *); 1065 void ndbStartReqLab(Signal *, BlockReference ref); 1066 void nodeRestartStartRecConfLab(Signal *); 1067 void dihCopyCompletedLab(Signal *); 1068 void release_connect(ConnectRecordPtr ptr); 1069 void copyTableNode(Signal *, 1070 CopyTableNode* ctn, 1071 NodeRecordPtr regNodePtr); 1072 void startFragment(Signal *, Uint32 tableId, Uint32 fragId); 1073 bool checkLcpAllTablesDoneInLqh(Uint32 from); 1074 1075 void lcpStateAtNodeFailureLab(Signal *, Uint32 nodeId); 1076 void copyNodeLab(Signal *, Uint32 tableId); 1077 void copyGciReqLab(Signal *); 1078 void allLab(Signal *, 1079 ConnectRecordPtr regConnectPtr, 1080 TabRecordPtr regTabPtr); 1081 void tableCopyNodeLab(Signal *, TabRecordPtr regTabPtr); 1082 1083 void removeNodeFromTables(Signal *, Uint32 tableId, Uint32 nodeId); 1084 void removeNodeFromTable(Signal *, Uint32 tableId, TabRecordPtr tabPtr); 1085 void removeNodeFromTablesComplete(Signal* signal, Uint32 nodeId); 1086 1087 void packFragIntoPagesLab(Signal *, RWFragment* wf); 1088 void startNextChkpt(Signal *); 1089 void failedNodeLcpHandling(Signal*, NodeRecordPtr failedNodePtr); 1090 void failedNodeSynchHandling(Signal *, NodeRecordPtr failedNodePtr); 1091 void checkCopyTab(Signal*, NodeRecordPtr failedNodePtr); 1092 1093 void initCommonData(); 1094 void initialiseRecordsLab(Signal *, Uint32 stepNo, Uint32, Uint32); 1095 1096 void findReplica(ReplicaRecordPtr& regReplicaPtr, 1097 Fragmentstore* fragPtrP, 1098 Uint32 nodeId, 1099 bool oldStoredReplicas = false); 1100 //------------------------------------ 1101 // Node failure handling methods 1102 //------------------------------------ 1103 void startRemoveFailedNode(Signal *, NodeRecordPtr failedNodePtr); 1104 void handleGcpTakeOver(Signal *, NodeRecordPtr failedNodePtr); 1105 void handleLcpTakeOver(Signal *, NodeRecordPtr failedNodePtr); 1106 void handleNewMaster(Signal *, NodeRecordPtr failedNodePtr); 1107 void handleTakeOver(Signal*, Ptr<TakeOverRecord>); 1108 void handleLcpMasterTakeOver(Signal *, Uint32 nodeId); 1109 1110 //------------------------------------ 1111 // Replica record specific methods 1112 //------------------------------------ 1113 Uint32 findLogInterval(ConstPtr<ReplicaRecord> regReplicaPtr, 1114 Uint32 startGci); 1115 void findMinGci(ReplicaRecordPtr fmgReplicaPtr, 1116 Uint32& keeGci, 1117 Uint32& oldestRestorableGci); 1118 bool findStartGci(ConstPtr<ReplicaRecord> fstReplicaPtr, 1119 Uint32 tfstStopGci, 1120 Uint32& tfstStartGci, 1121 Uint32& tfstLcp); 1122 void newCrashedReplica(ReplicaRecordPtr ncrReplicaPtr); 1123 void packCrashedReplicas(ReplicaRecordPtr pcrReplicaPtr); 1124 void releaseReplicas(Uint32 * replicaPtr); 1125 void removeOldCrashedReplicas(Uint32, Uint32, ReplicaRecordPtr rocReplicaPtr); 1126 void removeTooNewCrashedReplicas(ReplicaRecordPtr rtnReplicaPtr, Uint32 lastCompletedGCI); 1127 void mergeCrashedReplicas(ReplicaRecordPtr pcrReplicaPtr); 1128 void seizeReplicaRec(ReplicaRecordPtr& replicaPtr); 1129 1130 //------------------------------------ 1131 // Methods operating on a fragment and 1132 // its connected replicas and nodes. 1133 //------------------------------------ 1134 void allocStoredReplica(FragmentstorePtr regFragptr, 1135 ReplicaRecordPtr& newReplicaPtr, 1136 Uint32 nodeId); 1137 Uint32 extractNodeInfo(const Fragmentstore * fragPtr, Uint32 nodes[]); 1138 bool findBestLogNode(CreateReplicaRecord* createReplica, 1139 FragmentstorePtr regFragptr, 1140 Uint32 startGci, 1141 Uint32 stopGci, 1142 Uint32 logNode, 1143 Uint32& fblStopGci); 1144 bool findLogNodes(CreateReplicaRecord* createReplica, 1145 FragmentstorePtr regFragptr, 1146 Uint32 startGci, 1147 Uint32 stopGci); 1148 void initFragstore(FragmentstorePtr regFragptr); 1149 void insertBackup(FragmentstorePtr regFragptr, Uint32 nodeId); 1150 void insertfraginfo(FragmentstorePtr regFragptr, 1151 Uint32 noOfBackups, 1152 Uint32* nodeArray); 1153 void linkOldStoredReplica(FragmentstorePtr regFragptr, 1154 ReplicaRecordPtr replicaPtr); 1155 void linkStoredReplica(FragmentstorePtr regFragptr, 1156 ReplicaRecordPtr replicaPtr); 1157 void prepareReplicas(FragmentstorePtr regFragptr); 1158 void removeNodeFromStored(Uint32 nodeId, 1159 FragmentstorePtr regFragptr, 1160 ReplicaRecordPtr replicaPtr, 1161 bool temporary); 1162 void removeOldStoredReplica(FragmentstorePtr regFragptr, 1163 ReplicaRecordPtr replicaPtr); 1164 void removeStoredReplica(FragmentstorePtr regFragptr, 1165 ReplicaRecordPtr replicaPtr); 1166 void searchStoredReplicas(FragmentstorePtr regFragptr); 1167 bool setup_create_replica(FragmentstorePtr, CreateReplicaRecord*, 1168 ConstPtr<ReplicaRecord>); 1169 void updateNodeInfo(FragmentstorePtr regFragptr); 1170 1171 //------------------------------------ 1172 // Fragment allocation, deallocation and 1173 // find methods 1174 //------------------------------------ 1175 void allocFragments(Uint32 noOfFragments, TabRecordPtr regTabPtr); 1176 void releaseFragments(TabRecordPtr regTabPtr); 1177 void getFragstore(TabRecord *, Uint32 fragNo, FragmentstorePtr & ptr); 1178 void initialiseFragstore(); 1179 1180 void wait_old_scan(Signal*); 1181 Uint32 add_fragments_to_table(Ptr<TabRecord>, const Uint16 buf[]); 1182 Uint32 add_fragment_to_table(Ptr<TabRecord>, Uint32, Ptr<Fragmentstore>&); 1183 1184 void drop_fragments(Signal*, ConnectRecordPtr, Uint32 last); 1185 void release_fragment_from_table(Ptr<TabRecord>, Uint32 fragId); 1186 void send_alter_tab_ref(Signal*, Ptr<TabRecord>,Ptr<ConnectRecord>, Uint32); 1187 void send_alter_tab_conf(Signal*, Ptr<ConnectRecord>); 1188 void alter_table_writeTable_conf(Signal* signal, Uint32 ptrI, Uint32 err); 1189 void saveTableFile(Signal*, Ptr<ConnectRecord>, Ptr<TabRecord>, 1190 TabRecord::CopyStatus, Callback&); 1191 1192 //------------------------------------ 1193 // Page Record specific methods 1194 //------------------------------------ 1195 void allocpage(PageRecordPtr& regPagePtr); 1196 void releasePage(Uint32 pageIndex); 1197 1198 //------------------------------------ 1199 // Table Record specific methods 1200 //------------------------------------ 1201 void initTable(TabRecordPtr regTabPtr); 1202 void initTableFile(TabRecordPtr regTabPtr); 1203 void releaseTable(TabRecordPtr tabPtr); 1204 bool findTakeOver(Ptr<TakeOverRecord> & ptr, Uint32 failedNodeId); 1205 void handleTakeOverMaster(Signal *, Uint32 takeOverPtr); 1206 void handleTakeOverNewMaster(Signal *, Uint32 takeOverPtr); 1207 1208 //------------------------------------ 1209 // TakeOver Record specific methods 1210 //------------------------------------ 1211 void releaseTakeOver(TakeOverRecordPtr); 1212 void abortTakeOver(Signal*, TakeOverRecordPtr); 1213 bool anyActiveTakeOver(); 1214 void checkToCopy(); 1215 void checkToCopyCompleted(Signal *); 1216 bool checkToInterrupted(TakeOverRecordPtr& regTakeOverptr); 1217 Uint32 getStartNode(Uint32 takeOverPtr); 1218 1219 //------------------------------------ 1220 // Methods for take over functionality 1221 //------------------------------------ 1222 void changeNodeGroups(Uint32 startNode, Uint32 nodeTakenOver); 1223 void endTakeOver(Uint32 takeOverPtr); 1224 1225 void systemRestartTakeOverLab(Signal *); 1226 void startTakeOver(Signal *, 1227 Uint32 startNode, 1228 Uint32 toNode, 1229 const struct StartCopyReq*); 1230 void startNextCopyFragment(Signal *, Uint32 takeOverPtr); 1231 void toCopyFragLab(Signal *, Uint32 takeOverPtr); 1232 void toStartCopyFrag(Signal *, TakeOverRecordPtr); 1233 void startHsAddFragConfLab(Signal *); 1234 void prepareSendCreateFragReq(Signal *, Uint32 takeOverPtr); 1235 void toCopyCompletedLab(Signal *, TakeOverRecordPtr regTakeOverptr); 1236 void takeOverCompleted(Uint32 aNodeId); 1237 1238 //------------------------------------ 1239 // Node Record specific methods 1240 //------------------------------------ 1241 void checkStartTakeOver(Signal *); 1242 void insertAlive(NodeRecordPtr newNodePtr); 1243 void insertDeadNode(NodeRecordPtr removeNodePtr); 1244 void removeAlive(NodeRecordPtr removeNodePtr); 1245 void removeDeadNode(NodeRecordPtr removeNodePtr); 1246 1247 NodeRecord::NodeStatus getNodeStatus(Uint32 nodeId); 1248 void setNodeStatus(Uint32 nodeId, NodeRecord::NodeStatus); 1249 Sysfile::ActiveStatus getNodeActiveStatus(Uint32 nodeId); 1250 void setNodeActiveStatus(Uint32 nodeId, Sysfile::ActiveStatus newStatus); 1251 void setNodeLcpActiveStatus(Uint32 nodeId, bool newState); 1252 bool getNodeLcpActiveStatus(Uint32 nodeId); 1253 bool getAllowNodeStart(Uint32 nodeId); 1254 void setAllowNodeStart(Uint32 nodeId, bool newState); 1255 bool getNodeCopyCompleted(Uint32 nodeId); 1256 void setNodeCopyCompleted(Uint32 nodeId, bool newState); 1257 Uint32 getNodeGroup(Uint32 nodeId) const; 1258 bool checkNodeAlive(Uint32 nodeId); 1259 1260 void nr_start_fragments(Signal*, TakeOverRecordPtr); 1261 void nr_start_fragment(Signal*, TakeOverRecordPtr, ReplicaRecordPtr); 1262 void nr_run_redo(Signal*, TakeOverRecordPtr); 1263 void nr_start_logging(Signal*, TakeOverRecordPtr); 1264 1265 void getTabInfo(Signal*); 1266 void getTabInfo_send(Signal*, TabRecordPtr); 1267 void getTabInfo_sendComplete(Signal*, Uint32, Uint32); 1268 int getTabInfo_copyTableToSection(SegmentedSectionPtr & ptr, CopyTableNode); 1269 int getTabInfo_copySectionToPages(TabRecordPtr, SegmentedSectionPtr); 1270 1271 // Initialisation 1272 void initData(); 1273 void initRecords(); 1274 1275 // Variables to support record structures and their free lists 1276 1277 Uint32 capiConnectFileSize; 1278 1279 ConnectRecord *connectRecord; 1280 Uint32 cfirstconnect; 1281 Uint32 cconnectFileSize; 1282 1283 CreateReplicaRecord *createReplicaRecord; 1284 Uint32 cnoOfCreateReplicas; 1285 1286 FileRecord *fileRecord; 1287 Uint32 cfirstfreeFile; 1288 Uint32 cfileFileSize; 1289 1290 Fragmentstore *fragmentstore; 1291 Uint32 cfirstfragstore; 1292 Uint32 cfragstoreFileSize; 1293 RSS_OP_SNAPSHOT(cremainingfrags); 1294 1295 Uint32 c_nextNodeGroup; 1296 NodeGroupRecord *nodeGroupRecord; 1297 RSS_OP_SNAPSHOT(cnghash); 1298 1299 NodeRecord *nodeRecord; 1300 1301 PageRecord *pageRecord; 1302 Uint32 cfirstfreepage; 1303 Uint32 cpageFileSize; 1304 1305 ReplicaRecord *replicaRecord; 1306 Uint32 cfirstfreeReplica; 1307 Uint32 cnoFreeReplicaRec; 1308 Uint32 creplicaFileSize; 1309 RSS_OP_SNAPSHOT(cnoFreeReplicaRec); 1310 1311 TabRecord *tabRecord; 1312 Uint32 ctabFileSize; 1313 1314 ArrayPool<TakeOverRecord> c_takeOverPool; 1315 DLList<TakeOverRecord> c_activeTakeOverList; 1316 1317 /* 1318 2.4 C O M M O N S T O R E D V A R I A B L E S 1319 ---------------------------------------------------- 1320 */ 1321 struct DIVERIFY_queue 1322 { DIVERIFY_queueDbdih::DIVERIFY_queue1323 DIVERIFY_queue() { 1324 m_ref = 0; 1325 cfirstVerifyQueue = clastVerifyQueue = 0; 1326 apiConnectRecord = 0; 1327 m_empty_done = 1; 1328 } 1329 ApiConnectRecord *apiConnectRecord; 1330 Uint32 cfirstVerifyQueue; 1331 Uint32 clastVerifyQueue; 1332 Uint32 m_empty_done; 1333 Uint32 m_ref; 1334 }; 1335 1336 bool isEmpty(const DIVERIFY_queue&); 1337 void enqueue(DIVERIFY_queue&, Uint32 senderData, Uint64 gci); 1338 void dequeue(DIVERIFY_queue&, ApiConnectRecord &); 1339 void emptyverificbuffer(Signal *, Uint32 q, bool aContintueB); 1340 void emptyverificbuffer_check(Signal*, Uint32, Uint32); 1341 1342 DIVERIFY_queue c_diverify_queue[MAX_NDBMT_LQH_THREADS]; 1343 Uint32 c_diverify_queue_cnt; 1344 1345 /*------------------------------------------------------------------------*/ 1346 /* THIS VARIABLE KEEPS THE REFERENCES TO FILE RECORDS THAT DESCRIBE */ 1347 /* THE TWO FILES THAT ARE USED TO STORE THE VARIABLE CRESTART_INFO */ 1348 /* ON DISK. */ 1349 /*------------------------------------------------------------------------*/ 1350 Uint32 crestartInfoFile[2]; 1351 1352 bool cgckptflag; /* A FLAG WHICH IS SET WHILE A NEW GLOBAL CHECK 1353 POINT IS BEING CREATED. NO VERIFICATION IS ALLOWED 1354 IF THE FLAG IS SET*/ 1355 Uint32 cgcpOrderBlocked; 1356 1357 /** 1358 * This structure describes 1359 * the GCP Save protocol 1360 */ 1361 struct GcpSave 1362 { 1363 Uint32 m_gci; 1364 Uint32 m_master_ref; 1365 enum State { 1366 GCP_SAVE_IDLE = 0, // Idle 1367 GCP_SAVE_REQ = 1, // REQ received 1368 GCP_SAVE_CONF = 2, // REF/CONF sent 1369 GCP_SAVE_COPY_GCI = 3 1370 } m_state; 1371 1372 struct { 1373 State m_state; 1374 Uint32 m_new_gci; 1375 Uint32 m_time_between_gcp; /* Delay between global checkpoints */ 1376 Uint64 m_start_time; 1377 } m_master; 1378 } m_gcp_save; 1379 1380 /** 1381 * This structure describes the MicroGCP protocol 1382 */ 1383 struct MicroGcp 1384 { MicroGcpDbdih::MicroGcp1385 MicroGcp() { } 1386 bool m_enabled; 1387 Uint32 m_master_ref; 1388 1389 /** 1390 * rw-lock that protects multiple parallel DIVERIFY (readers) from 1391 * updates to gcp-state (e.g GCP_PREPARE, GCP_COMMIT) 1392 */ 1393 NdbSeqLock m_lock; 1394 Uint64 m_old_gci; 1395 Uint64 m_current_gci; // Currently active 1396 Uint64 m_new_gci; // Currently being prepared... 1397 enum State { 1398 M_GCP_IDLE = 0, 1399 M_GCP_PREPARE = 1, 1400 M_GCP_COMMIT = 2, 1401 M_GCP_COMMITTED = 3, 1402 M_GCP_COMPLETE = 4 1403 } m_state; 1404 1405 struct { 1406 State m_state; 1407 Uint32 m_time_between_gcp; 1408 Uint64 m_new_gci; 1409 Uint64 m_start_time; 1410 } m_master; 1411 } m_micro_gcp; 1412 1413 struct GcpMonitor 1414 { 1415 struct 1416 { 1417 Uint32 m_gci; 1418 Uint32 m_counter; 1419 Uint32 m_max_lag; 1420 } m_gcp_save; 1421 1422 struct 1423 { 1424 Uint64 m_gci; 1425 Uint32 m_counter; 1426 Uint32 m_max_lag; 1427 } m_micro_gcp; 1428 } m_gcp_monitor; 1429 1430 /*------------------------------------------------------------------------*/ 1431 /* THIS VARIABLE KEEPS TRACK OF THE STATE OF THIS NODE AS MASTER. */ 1432 /*------------------------------------------------------------------------*/ 1433 enum MasterState { 1434 MASTER_IDLE = 0, 1435 MASTER_ACTIVE = 1, 1436 MASTER_TAKE_OVER_GCP = 2 1437 }; 1438 MasterState cmasterState; 1439 Uint16 cmasterTakeOverNode; 1440 /* NODE IS NOT MASTER */ 1441 /* NODE IS ACTIVE AS MASTER */ 1442 /* NODE IS TAKING OVER AS MASTER */ 1443 1444 struct CopyGCIMaster { CopyGCIMasterDbdih::CopyGCIMaster1445 CopyGCIMaster(){ 1446 m_copyReason = CopyGCIReq::IDLE; 1447 for (Uint32 i = 0; i<WAIT_CNT; i++) 1448 m_waiting[i] = CopyGCIReq::IDLE; 1449 } 1450 /*------------------------------------------------------------------------*/ 1451 /* THIS STATE VARIABLE IS USED TO INDICATE IF COPYING OF RESTART */ 1452 /* INFO WAS STARTED BY A LOCAL CHECKPOINT OR AS PART OF A SYSTEM */ 1453 /* RESTART. */ 1454 /*------------------------------------------------------------------------*/ 1455 CopyGCIReq::CopyReason m_copyReason; 1456 1457 /*------------------------------------------------------------------------*/ 1458 /* COPYING RESTART INFO CAN BE STARTED BY LOCAL CHECKPOINTS AND BY */ 1459 /* GLOBAL CHECKPOINTS. WE CAN HOWEVER ONLY HANDLE TWO SUCH COPY AT */ 1460 /* THE TIME. THUS WE HAVE TO KEEP WAIT INFORMATION IN THIS VARIABLE.*/ 1461 /*------------------------------------------------------------------------*/ 1462 STATIC_CONST( WAIT_CNT = 2 ); 1463 CopyGCIReq::CopyReason m_waiting[WAIT_CNT]; 1464 } c_copyGCIMaster; 1465 1466 struct CopyGCISlave { CopyGCISlaveDbdih::CopyGCISlave1467 CopyGCISlave(){ m_copyReason = CopyGCIReq::IDLE; m_expectedNextWord = 0;} 1468 /*------------------------------------------------------------------------*/ 1469 /* THIS STATE VARIABLE IS USED TO INDICATE IF COPYING OF RESTART */ 1470 /* INFO WAS STARTED BY A LOCAL CHECKPOINT OR AS PART OF A SYSTEM */ 1471 /* RESTART. THIS VARIABLE IS USED BY THE NODE THAT RECEIVES */ 1472 /* COPY_GCI_REQ. */ 1473 /*------------------------------------------------------------------------*/ 1474 Uint32 m_senderData; 1475 BlockReference m_senderRef; 1476 CopyGCIReq::CopyReason m_copyReason; 1477 1478 Uint32 m_expectedNextWord; 1479 } c_copyGCISlave; 1480 1481 /*------------------------------------------------------------------------*/ 1482 /* THIS VARIABLE IS USED TO KEEP TRACK OF THE STATE OF LOCAL */ 1483 /* CHECKPOINTS. */ 1484 /*------------------------------------------------------------------------*/ 1485 public: 1486 enum LcpStatus { 1487 LCP_STATUS_IDLE = 0, 1488 LCP_TCGET = 1, // Only master 1489 LCP_STATUS_ACTIVE = 2, 1490 LCP_CALCULATE_KEEP_GCI = 4, // Only master 1491 LCP_COPY_GCI = 5, 1492 LCP_INIT_TABLES = 6, 1493 LCP_TC_CLOPSIZE = 7, // Only master 1494 LCP_START_LCP_ROUND = 8, 1495 LCP_TAB_COMPLETED = 9, 1496 LCP_TAB_SAVED = 10 1497 }; 1498 private: 1499 1500 struct LcpState { LcpStateDbdih::LcpState1501 LcpState() {} 1502 LcpStatus lcpStatus; 1503 Uint32 lcpStatusUpdatedPlace; 1504 1505 struct Save { 1506 LcpStatus m_status; 1507 Uint32 m_place; 1508 } m_saveState[10]; 1509 setLcpStatusDbdih::LcpState1510 void setLcpStatus(LcpStatus status, Uint32 line){ 1511 for (Uint32 i = 9; i > 0; i--) 1512 m_saveState[i] = m_saveState[i-1]; 1513 m_saveState[0].m_status = lcpStatus; 1514 m_saveState[0].m_place = lcpStatusUpdatedPlace; 1515 1516 lcpStatus = status; 1517 lcpStatusUpdatedPlace = line; 1518 } 1519 1520 Uint32 lcpStart; 1521 Uint32 lcpStopGcp; 1522 Uint32 keepGci; /* USED TO CALCULATE THE GCI TO KEEP AFTER A LCP */ 1523 Uint32 oldestRestorableGci; 1524 1525 Uint64 m_start_time; // When last LCP was started 1526 Uint64 m_lcp_time; // How long last LCP took 1527 Uint32 m_lcp_trylock_timeout; 1528 1529 struct CurrentFragment { 1530 Uint32 tableId; 1531 Uint32 fragmentId; 1532 } currentFragment; 1533 1534 Uint32 noOfLcpFragRepOutstanding; 1535 1536 /*------------------------------------------------------------------------*/ 1537 /* USED TO ENSURE THAT LCP'S ARE EXECUTED WITH CERTAIN TIMEINTERVALS*/ 1538 /* EVEN WHEN SYSTEM IS NOT DOING ANYTHING. */ 1539 /*------------------------------------------------------------------------*/ 1540 Uint32 ctimer; 1541 Uint32 ctcCounter; 1542 Uint32 clcpDelay; /* MAX. 2^(CLCP_DELAY - 2) SEC BETWEEN LCP'S */ 1543 1544 /*------------------------------------------------------------------------*/ 1545 /* THIS STATE IS USED TO TELL IF THE FIRST LCP AFTER START/RESTART */ 1546 /* HAS BEEN RUN. AFTER A NODE RESTART THE NODE DOES NOT ENTER */ 1547 /* STARTED STATE BEFORE THIS IS DONE. */ 1548 /*------------------------------------------------------------------------*/ 1549 bool immediateLcpStart; 1550 bool m_LCP_COMPLETE_REP_From_Master_Received; 1551 SignalCounter m_LCP_COMPLETE_REP_Counter_DIH; 1552 SignalCounter m_LCP_COMPLETE_REP_Counter_LQH; 1553 SignalCounter m_LAST_LCP_FRAG_ORD; 1554 NdbNodeBitmask m_participatingLQH; 1555 NdbNodeBitmask m_participatingDIH; 1556 1557 Uint32 m_masterLcpDihRef; 1558 bool m_MASTER_LCPREQ_Received; 1559 Uint32 m_MASTER_LCPREQ_FailedNodeId; 1560 1561 Uint32 m_lastLCP_COMPLETE_REP_id; 1562 Uint32 m_lastLCP_COMPLETE_REP_ref; 1563 } c_lcpState; 1564 1565 /*------------------------------------------------------------------------*/ 1566 /* THIS VARIABLE KEEPS TRACK OF HOW MANY TABLES ARE ACTIVATED WHEN */ 1567 /* STARTING A LOCAL CHECKPOINT WE SHOULD AVOID STARTING A CHECKPOINT*/ 1568 /* WHEN NO TABLES ARE ACTIVATED. */ 1569 /*------------------------------------------------------------------------*/ 1570 Uint32 cnoOfActiveTables; 1571 1572 BlockReference cdictblockref; /* DICTIONARY BLOCK REFERENCE */ 1573 Uint32 cfailurenr; /* EVERY TIME WHEN A NODE FAILURE IS REPORTED 1574 THIS NUMBER IS INCREMENTED. AT THE START OF 1575 THE SYSTEM THIS NUMBER MUST BE INITIATED TO 1576 ZERO */ 1577 1578 BlockReference clocallqhblockref; 1579 BlockReference clocaltcblockref; 1580 BlockReference cmasterdihref; 1581 Uint16 cownNodeId; 1582 BlockReference cndbStartReqBlockref; 1583 BlockReference cntrlblockref; 1584 Uint32 con_lineNodes; 1585 Uint32 creceivedfrag; 1586 Uint32 cremainingfrags; 1587 Uint32 cstarttype; 1588 Uint32 csystemnodes; 1589 Uint32 c_newest_restorable_gci; 1590 Uint32 c_set_initial_start_flag; 1591 Uint64 c_current_time; // Updated approx. every 10ms 1592 1593 public: 1594 enum LcpMasterTakeOverState { 1595 LMTOS_IDLE = 0, 1596 LMTOS_WAIT_EMPTY_LCP = 1, // Currently doing empty LCP 1597 LMTOS_WAIT_LCP_FRAG_REP = 2,// Currently waiting for outst. LCP_FRAG_REP 1598 LMTOS_INITIAL = 3, 1599 LMTOS_ALL_IDLE = 4, 1600 LMTOS_ALL_ACTIVE = 5, 1601 LMTOS_LCP_CONCLUDING = 6, 1602 LMTOS_COPY_ONGOING = 7 1603 }; 1604 private: 1605 class MasterTakeOverState { 1606 public: MasterTakeOverState()1607 MasterTakeOverState() {} set(LcpMasterTakeOverState s,Uint32 line)1608 void set(LcpMasterTakeOverState s, Uint32 line) { 1609 state = s; updatePlace = line; 1610 } 1611 1612 LcpMasterTakeOverState state; 1613 Uint32 updatePlace; 1614 1615 Uint32 minTableId; 1616 Uint32 minFragId; 1617 Uint32 failedNodeId; 1618 } c_lcpMasterTakeOverState; 1619 1620 Uint16 cmasterNodeId; 1621 1622 struct NodeStartMasterRecord { NodeStartMasterRecordDbdih::NodeStartMasterRecord1623 NodeStartMasterRecord() {} 1624 Uint32 startNode; 1625 Uint32 wait; 1626 Uint32 failNr; 1627 bool activeState; 1628 bool blockLcp; 1629 Uint32 blockGcp; // 0, 1=ordered, 2=effective 1630 Uint32 startInfoErrorCode; 1631 Uint32 m_outstandingGsn; 1632 MutexHandle2<DIH_FRAGMENT_INFO> m_fragmentInfoMutex; 1633 }; 1634 NodeStartMasterRecord c_nodeStartMaster; 1635 1636 struct NodeStartSlaveRecord { NodeStartSlaveRecordDbdih::NodeStartSlaveRecord1637 NodeStartSlaveRecord() { nodeId = 0;} 1638 1639 Uint32 nodeId; 1640 }; 1641 NodeStartSlaveRecord c_nodeStartSlave; 1642 1643 Uint32 cfirstAliveNode; 1644 Uint32 cfirstDeadNode; 1645 Uint32 cstartPhase; 1646 Uint32 cnoReplicas; 1647 1648 bool cwaitLcpSr; 1649 /** 1650 * Available nodegroups (ids) (length == cnoOfNodeGroups) 1651 * use to support nodegroups 2,4,6 (not just consequtive nodegroup ids) 1652 */ 1653 Uint32 c_node_groups[MAX_NDB_NODES]; 1654 Uint32 cnoOfNodeGroups; 1655 Uint32 crestartGci; /* VALUE OF GCI WHEN SYSTEM RESTARTED OR STARTED */ 1656 1657 /** 1658 * Counter variables keeping track of the number of outstanding signals 1659 * for particular signals in various protocols. 1660 */ 1661 SignalCounter c_COPY_GCIREQ_Counter; 1662 SignalCounter c_COPY_TABREQ_Counter; 1663 SignalCounter c_CREATE_FRAGREQ_Counter; 1664 SignalCounter c_DIH_SWITCH_REPLICA_REQ_Counter; 1665 SignalCounter c_EMPTY_LCP_REQ_Counter; 1666 SignalCounter c_GCP_COMMIT_Counter; 1667 SignalCounter c_GCP_PREPARE_Counter; 1668 SignalCounter c_GCP_SAVEREQ_Counter; 1669 SignalCounter c_SUB_GCP_COMPLETE_REP_Counter; 1670 SignalCounter c_INCL_NODEREQ_Counter; 1671 SignalCounter c_MASTER_GCPREQ_Counter; 1672 SignalCounter c_MASTER_LCPREQ_Counter; 1673 SignalCounter c_START_INFOREQ_Counter; 1674 SignalCounter c_START_RECREQ_Counter; 1675 SignalCounter c_STOP_ME_REQ_Counter; 1676 SignalCounter c_TC_CLOPSIZEREQ_Counter; 1677 SignalCounter c_TCGETOPSIZEREQ_Counter; 1678 SignalCounter c_START_LCP_REQ_Counter; 1679 1680 bool c_blockCommit; 1681 Uint32 c_blockCommitNo; 1682 getBlockCommit() const1683 bool getBlockCommit() const { 1684 return c_blockCommit || cgckptflag; 1685 } 1686 1687 /** 1688 * SwitchReplicaRecord - Should only be used by master 1689 */ 1690 struct SwitchReplicaRecord { SwitchReplicaRecordDbdih::SwitchReplicaRecord1691 SwitchReplicaRecord() {} clearDbdih::SwitchReplicaRecord1692 void clear(){} 1693 1694 Uint32 nodeId; 1695 Uint32 tableId; 1696 Uint32 fragNo; 1697 }; 1698 SwitchReplicaRecord c_switchReplicas; 1699 1700 struct StopPermProxyRecord { StopPermProxyRecordDbdih::StopPermProxyRecord1701 StopPermProxyRecord() { clientRef = 0; } 1702 1703 Uint32 clientData; 1704 BlockReference clientRef; 1705 BlockReference masterRef; 1706 }; 1707 1708 struct StopPermMasterRecord { StopPermMasterRecordDbdih::StopPermMasterRecord1709 StopPermMasterRecord() { clientRef = 0;} 1710 1711 Uint32 returnValue; 1712 1713 Uint32 clientData; 1714 BlockReference clientRef; 1715 }; 1716 1717 StopPermProxyRecord c_stopPermProxy; 1718 StopPermMasterRecord c_stopPermMaster; 1719 1720 void checkStopPermProxy(Signal*, NodeId failedNodeId); 1721 void checkStopPermMaster(Signal*, NodeRecordPtr failedNodePtr); 1722 1723 void switchReplica(Signal*, 1724 Uint32 nodeId, 1725 Uint32 tableId, 1726 Uint32 fragNo); 1727 1728 void switchReplicaReply(Signal*, NodeId nodeId); 1729 1730 /** 1731 * Wait GCP (proxy) 1732 */ 1733 struct WaitGCPProxyRecord { WaitGCPProxyRecordDbdih::WaitGCPProxyRecord1734 WaitGCPProxyRecord() { clientRef = 0;} 1735 1736 Uint32 clientData; 1737 BlockReference clientRef; 1738 BlockReference masterRef; 1739 1740 union { Uint32 nextPool; Uint32 nextList; }; 1741 Uint32 prevList; 1742 }; 1743 typedef Ptr<WaitGCPProxyRecord> WaitGCPProxyPtr; 1744 1745 /** 1746 * Wait GCP (master) 1747 */ 1748 struct WaitGCPMasterRecord { WaitGCPMasterRecordDbdih::WaitGCPMasterRecord1749 WaitGCPMasterRecord() { clientRef = 0;} 1750 Uint32 clientData; 1751 BlockReference clientRef; 1752 1753 union { Uint32 nextPool; Uint32 nextList; }; 1754 Uint32 prevList; 1755 }; 1756 typedef Ptr<WaitGCPMasterRecord> WaitGCPMasterPtr; 1757 1758 /** 1759 * Pool/list of WaitGCPProxyRecord record 1760 */ 1761 ArrayPool<WaitGCPProxyRecord> waitGCPProxyPool; 1762 DLList<WaitGCPProxyRecord> c_waitGCPProxyList; 1763 1764 /** 1765 * Pool/list of WaitGCPMasterRecord record 1766 */ 1767 ArrayPool<WaitGCPMasterRecord> waitGCPMasterPool; 1768 typedef DLList<WaitGCPMasterRecord> WaitGCPList; 1769 WaitGCPList c_waitGCPMasterList; 1770 WaitGCPList c_waitEpochMasterList; 1771 1772 void checkWaitGCPProxy(Signal*, NodeId failedNodeId); 1773 void checkWaitGCPMaster(Signal*, NodeId failedNodeId); 1774 void emptyWaitGCPMasterQueue(Signal*, Uint64, WaitGCPList&); 1775 1776 /** 1777 * Stop me 1778 */ 1779 struct StopMeRecord { StopMeRecordDbdih::StopMeRecord1780 StopMeRecord() { clientRef = 0;} 1781 1782 BlockReference clientRef; 1783 Uint32 clientData; 1784 }; 1785 StopMeRecord c_stopMe; 1786 1787 void checkStopMe(Signal *, NodeRecordPtr failedNodePtr); 1788 1789 #define DIH_CDATA_SIZE 128 1790 /** 1791 * This variable must be atleast the size of Sysfile::SYSFILE_SIZE32 1792 */ 1793 Uint32 cdata[DIH_CDATA_SIZE]; /* TEMPORARY ARRAY VARIABLE */ 1794 1795 /** 1796 * Sys file data 1797 */ 1798 Uint32 sysfileData[DIH_CDATA_SIZE]; 1799 Uint32 sysfileDataToFile[DIH_CDATA_SIZE]; 1800 1801 /** 1802 * When a node comes up without filesystem 1803 * we have to clear all LCP for that node 1804 */ 1805 void invalidateNodeLCP(Signal *, Uint32 nodeId, Uint32 tableId); 1806 void invalidateNodeLCP(Signal *, Uint32 nodeId, TabRecordPtr); 1807 1808 /** 1809 * Reply from nodeId 1810 */ 1811 void startInfoReply(Signal *, Uint32 nodeId); 1812 1813 void dump_replica_info(); 1814 void dump_replica_info(const Fragmentstore*); 1815 1816 // DIH specifics for execNODE_START_REP (sendDictUnlockOrd) 1817 void execNODE_START_REP(Signal* signal); 1818 1819 /* 1820 * Lock master DICT. Only current use is by starting node 1821 * during NR. A pool of slave records is convenient anyway. 1822 */ 1823 struct DictLockSlaveRecord { 1824 Uint32 lockPtr; 1825 Uint32 lockType; 1826 bool locked; 1827 Callback callback; 1828 Uint32 nextPool; 1829 }; 1830 1831 typedef Ptr<DictLockSlaveRecord> DictLockSlavePtr; 1832 ArrayPool<DictLockSlaveRecord> c_dictLockSlavePool; 1833 1834 // slave 1835 void sendDictLockReq(Signal* signal, Uint32 lockType, Callback c); 1836 void recvDictLockConf(Signal* signal); 1837 void sendDictUnlockOrd(Signal* signal, Uint32 lockSlavePtrI); 1838 1839 // NR 1840 Uint32 c_dictLockSlavePtrI_nodeRestart; // userPtr for NR 1841 void recvDictLockConf_nodeRestart(Signal* signal, Uint32 data, Uint32 ret); 1842 1843 Uint32 c_error_7181_ref; 1844 1845 #ifdef ERROR_INSERT 1846 void sendToRandomNodes(const char*, Signal*, SignalCounter*, 1847 SendFunction, 1848 Uint32 extra = RNIL, 1849 Uint32 block = 0, Uint32 gsn = 0, Uint32 len = 0, 1850 JobBufferLevel = JBB); 1851 #endif 1852 1853 bool check_enable_micro_gcp(Signal* signal, bool broadcast); 1854 1855 bool c_sr_wait_to; 1856 NdbNodeBitmask m_sr_nodes; 1857 NdbNodeBitmask m_to_nodes; 1858 1859 void startme_copygci_conf(Signal*); 1860 1861 /** 1862 * Local LCP state 1863 * This struct is more or less a copy of lcp-state 1864 * Reason for duplicating it is that 1865 * - not to mess with current code 1866 * - this one is "distributed", i.e maintained by *all* nodes, 1867 * not like c_lcpState which mixed master/slave state in a "unnatural" 1868 * way 1869 */ 1870 struct LocalLCPState 1871 { 1872 enum State { 1873 LS_INITIAL = 0, 1874 LS_RUNNING = 1, 1875 LS_COMPLETE = 2 1876 } m_state; 1877 1878 StartLcpReq m_start_lcp_req; 1879 Uint32 m_keep_gci; // Min GCI is needed to restore LCP 1880 Uint32 m_stop_gci; // This GCI needs to be complete before LCP is restorable 1881 LocalLCPStateDbdih::LocalLCPState1882 LocalLCPState() { reset();} 1883 1884 void reset(); 1885 void init(const StartLcpReq*); 1886 void lcp_frag_rep(const LcpFragRep*); 1887 void lcp_complete_rep(Uint32 gci); 1888 1889 /** 1890 * @param gci - current GCI being made restorable (COPY_GCI) 1891 */ 1892 bool check_cut_log_tail(Uint32 gci) const; 1893 } m_local_lcp_state; 1894 1895 // MT LQH 1896 Uint32 c_fragments_per_node; dihGetInstanceKey(FragmentstorePtr tFragPtr)1897 Uint32 dihGetInstanceKey(FragmentstorePtr tFragPtr) { 1898 ndbrequire(!tFragPtr.isNull()); 1899 Uint32 log_part_id = tFragPtr.p->m_log_part_id; 1900 Uint32 instanceKey = 1 + log_part_id % MAX_NDBMT_LQH_WORKERS; 1901 return instanceKey; 1902 } 1903 Uint32 dihGetInstanceKey(Uint32 tabId, Uint32 fragId); 1904 1905 bool c_2pass_inr; 1906 }; 1907 1908 #if (DIH_CDATA_SIZE < _SYSFILE_SIZE32) 1909 #error "cdata is to small compared to Sysfile size" 1910 #endif 1911 1912 #endif 1913 1914