1 /*
2 Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #ifndef DBLQH_H
26 #define DBLQH_H
27
28 #ifndef DBLQH_STATE_EXTRACT
29 #include <pc.hpp>
30 #include <ndb_limits.h>
31 #include <SimulatedBlock.hpp>
32 #include <SectionReader.hpp>
33 #include <IntrusiveList.hpp>
34 #include "ArrayPool.hpp"
35 #include <DLHashTable.hpp>
36
37 #include <NodeBitmask.hpp>
38 #include <signaldata/NodeRecoveryStatusRep.hpp>
39 #include <signaldata/LCP.hpp>
40 #include <signaldata/LqhTransConf.hpp>
41 #include <signaldata/CreateTab.hpp>
42 #include <signaldata/LqhFrag.hpp>
43 #include <signaldata/FsOpenReq.hpp>
44 #include <signaldata/DropTab.hpp>
45 #include <signaldata/CopyFrag.hpp>
46
47 // primary key is stored in TUP
48 #include "../dbtup/Dbtup.hpp"
49 #include "../dbacc/Dbacc.hpp"
50 #include "../dbtux/Dbtux.hpp"
51 #include "../backup/Backup.hpp"
52 #include "../restore.hpp"
53
54 #include "TransientPool.hpp"
55 #include "TransientSlotPool.hpp"
56
57 class Dbacc;
58 class Dbtup;
59 class Dbtux;
60 class Lgman;
61 #endif // DBLQH_STATE_EXTRACT
62
63 #define JAM_FILE_ID 450
64
65 #ifdef DBLQH_C
66 // Constants
67 /* ------------------------------------------------------------------------- */
68 /* CONSTANTS USED WHEN MASTER REQUESTS STATE OF COPY FRAGMENTS. */
69 /* ------------------------------------------------------------------------- */
70 #define ZCOPY_CLOSING 0
71 #define ZCOPY_ONGOING 1
72 #define ZCOPY_ACTIVATION 2
73 /* ------------------------------------------------------------------------- */
74 /* STATES FOR THE VARIABLE GCP_LOG_PART_STATE */
75 /* ------------------------------------------------------------------------- */
76 #define ZIDLE 0
77 #define ZWAIT_DISK 1
78 #define ZON_DISK 2
79 #define ZACTIVE 1
80 /* ------------------------------------------------------------------------- */
81 /* STATES FOR THE VARIABLE CSR_PHASES_STARTED */
82 /* ------------------------------------------------------------------------- */
83 #define ZSR_NO_PHASE_STARTED 0
84 #define ZSR_PHASE1_COMPLETED 1
85 #define ZSR_PHASE2_COMPLETED 2
86 #define ZSR_BOTH_PHASES_STARTED 3
87 /* ------------------------------------------------------------------------- */
88 /* THE NUMBER OF PAGES IN A MBYTE, THE TWO LOGARITHM OF THIS. */
89 /* THE NUMBER OF MBYTES IN A LOG FILE. */
90 /* THE MAX NUMBER OF PAGES READ/WRITTEN FROM/TO DISK DURING */
91 /* A WRITE OR READ. */
92 /* ------------------------------------------------------------------------- */
93 #define ZNOT_DIRTY 0
94 #define ZDIRTY 1
95 #define ZREAD_AHEAD_SIZE 8
96 /* ------------------------------------------------------------------------- */
97 /* CONSTANTS OF THE LOG PAGES */
98 /* ------------------------------------------------------------------------- */
99 #define ZPAGE_HEADER_SIZE 32
100 #define ZPAGE_SIZE 8192
101 #define ZPAGES_IN_MBYTE 32
102 #define ZTWOLOG_NO_PAGES_IN_MBYTE 5
103 #define ZTWOLOG_PAGE_SIZE 13
104 #define ZMAX_MM_BUFFER_SIZE 32 // Main memory window during log execution
105
106 #define ZMAX_PAGES_WRITTEN 8 // Max pages before writing to disk (=> config)
107 #define ZMIN_READ_BUFFER_SIZE 2 // Minimum number of pages to execute log
108 #define ZMIN_LOG_PAGES_OPERATION 10 // Minimum no of pages before stopping
109
110 #define ZPOS_CHECKSUM 0
111 #define ZPOS_LOG_LAP 1
112 #define ZPOS_MAX_GCI_COMPLETED 2
113 #define ZPOS_MAX_GCI_STARTED 3
114 #define ZNEXT_PAGE 4
115 #define ZPREV_PAGE 5
116 #define ZPOS_VERSION 6
117 #define ZPOS_NO_LOG_FILES 7
118 #define ZCURR_PAGE_INDEX 8
119 #define ZLAST_LOG_PREP_REF 10
120 #define ZPOS_DIRTY 11
121 /* A number of debug items written in the page header of all log files */
122 #define ZPOS_LOG_TIMER 12
123 #define ZPOS_PAGE_I 13
124 #define ZPOS_PLACE_WRITTEN_FROM 14
125 #define ZPOS_PAGE_NO 15
126 #define ZPOS_PAGE_FILE_NO 16
127 #define ZPOS_WORD_WRITTEN 17
128 #define ZPOS_IN_WRITING 18
129 #define ZPOS_PREV_PAGE_NO 19
130 #define ZPOS_IN_FREE_LIST 20
131
132 /* Specify number of log parts used to enable use of more LQH threads */
133 #define ZPOS_NO_LOG_PARTS 21
134
135 /* ------------------------------------------------------------------------- */
136 /* CONSTANTS FOR THE VARIOUS REPLICA AND NODE TYPES. */
137 /* ------------------------------------------------------------------------- */
138 #define ZPRIMARY_NODE 0
139 #define ZBACKUP_NODE 1
140 #define ZSTANDBY_NODE 2
141 #define ZTC_NODE 3
142 #define ZLOG_NODE 3
143 /* ------------------------------------------------------------------------- */
144 /* VARIOUS CONSTANTS USED AS FLAGS TO THE FILE MANAGER. */
145 /* ------------------------------------------------------------------------- */
146 #define ZVAR_NO_LOG_PAGE_WORD 1
147 #define ZLIST_OF_PAIRS 0
148 #define ZLIST_OF_PAIRS_SYNCH 16
149 #define ZARRAY_OF_PAGES 1
150 #define ZLIST_OF_MEM_PAGES 2
151 #define ZLIST_OF_MEM_PAGES_SYNCH 18
152 #define ZCLOSE_NO_DELETE 0
153 #define ZCLOSE_DELETE 1
154 #define ZPAGE_ZERO 0
155 /* ------------------------------------------------------------------------- */
156 /* THE FOLLOWING CONSTANTS ARE USED TO DESCRIBE THE TYPES OF */
157 /* LOG RECORDS, THE SIZE OF THE VARIOUS LOG RECORD TYPES AND */
158 /* THE POSITIONS WITHIN THOSE LOG RECORDS. */
159 /* ------------------------------------------------------------------------- */
160 /* ------------------------------------------------------------------------- */
161 /* THESE CONSTANTS DESCRIBE THE SIZES OF VARIOUS TYPES OF LOG REORDS. */
162 /* NEXT_LOG_SIZE IS ACTUALLY ONE. THE REASON WE SET IT TO 2 IS TO */
163 /* SIMPLIFY THE CODE SINCE OTHERWISE HAVE TO USE A SPECIAL VERSION */
164 /* OF READ_LOGWORD WHEN READING LOG RECORD TYPE */
165 /* SINCE NEXT MBYTE TYPE COULD BE THE VERY LAST WORD IN THE MBYTE. */
166 /* BY SETTING IT TO 2 WE ENSURE IT IS NEVER THE VERY LAST WORD */
167 /* IN THE MBYTE. */
168 /* ------------------------------------------------------------------------- */
169 #define ZFD_HEADER_SIZE 3
170 #define ZFD_MBYTE_SIZE 3
171 #define ZLOG_HEAD_SIZE 8
172 #define ZNEXT_LOG_SIZE 2
173 #define ZABORT_LOG_SIZE 3
174 #define ZCOMMIT_LOG_SIZE 9
175 #define ZCOMPLETED_GCI_LOG_SIZE 2
176 /* ------------------------------------------------------------------------- */
177 /* THESE CONSTANTS DESCRIBE THE TYPE OF A LOG RECORD. */
178 /* THIS IS THE FIRST WORD OF A LOG RECORD. */
179 /* ------------------------------------------------------------------------- */
180 #define ZNEW_PREP_OP_TYPE 0
181 #define ZPREP_OP_TYPE 1
182 #define ZCOMMIT_TYPE 2
183 #define ZABORT_TYPE 3
184 #define ZFD_TYPE 4
185 #define ZFRAG_SPLIT_TYPE 5
186 #define ZNEXT_LOG_RECORD_TYPE 6
187 #define ZNEXT_MBYTE_TYPE 7
188 #define ZCOMPLETED_GCI_TYPE 8
189 #define ZINVALID_COMMIT_TYPE 9
190 /* ------------------------------------------------------------------------- */
191 /* THE POSITIONS OF LOGGED DATA IN A FILE DESCRIPTOR LOG RECORD HEADER.*/
192 /* ALSO THE MAXIMUM NUMBER OF FILE DESCRIPTORS IN A LOG RECORD. */
193 /* ------------------------------------------------------------------------- */
194 #define ZPOS_LOG_TYPE 0
195 #define ZPOS_NO_FD 1
196 #define ZPOS_FILE_NO 2
197 /* ------------------------------------------------------------------------- */
198 /* THE POSITIONS WITHIN A PREPARE LOG RECORD AND A NEW PREPARE */
199 /* LOG RECORD. */
200 /* ------------------------------------------------------------------------- */
201 #define ZPOS_HASH_VALUE 2
202 #define ZPOS_SCHEMA_VERSION 3
203 #define ZPOS_TRANS_TICKET 4
204 #define ZPOS_OP_TYPE 5
205 #define ZPOS_NO_ATTRINFO 6
206 #define ZPOS_NO_KEYINFO 7
207 /* ------------------------------------------------------------------------- */
208 /* THE POSITIONS WITHIN A COMMIT LOG RECORD. */
209 /* ------------------------------------------------------------------------- */
210 #define ZPOS_COMMIT_TRANSID1 1
211 #define ZPOS_COMMIT_TRANSID2 2
212 #define ZPOS_COMMIT_GCI 3
213 #define ZPOS_COMMIT_TABLE_REF 4
214 #define ZPOS_COMMIT_FRAGID 5
215 #define ZPOS_COMMIT_FILE_NO 6
216 #define ZPOS_COMMIT_START_PAGE_NO 7
217 #define ZPOS_COMMIT_START_PAGE_INDEX 8
218 #define ZPOS_COMMIT_STOP_PAGE_NO 9
219 /* ------------------------------------------------------------------------- */
220 /* THE POSITIONS WITHIN A ABORT LOG RECORD. */
221 /* ------------------------------------------------------------------------- */
222 #define ZPOS_ABORT_TRANSID1 1
223 #define ZPOS_ABORT_TRANSID2 2
224 /* ------------------------------------------------------------------------- */
225 /* THE POSITION WITHIN A COMPLETED GCI LOG RECORD. */
226 /* ------------------------------------------------------------------------- */
227 #define ZPOS_COMPLETED_GCI 1
228 /* ------------------------------------------------------------------------- */
229 /* THE POSITIONS WITHIN A NEW PREPARE LOG RECORD. */
230 /* ------------------------------------------------------------------------- */
231 #define ZPOS_NEW_PREP_FILE_NO 8
232 #define ZPOS_NEW_PREP_PAGE_REF 9
233
234 #define ZLAST_WRITE_IN_FILE 1
235 #define ZENFORCE_WRITE 2
236 /* ------------------------------------------------------------------------- */
237 /* CONSTANTS USED AS INPUT TO SUBROUTINE WRITE_LOG_PAGES AMONG OTHERS. */
238 /* ------------------------------------------------------------------------- */
239 #define ZNORMAL 0
240 #define ZINIT 1
241 /* ------------------------------------------------------------------------- */
242 /* CONSTANTS USED BY CONTINUEB TO DEDUCE WHICH CONTINUE SIGNAL IS TO */
243 /* BE EXECUTED AS A RESULT OF THIS CONTINUEB SIGNAL. */
244 /* ------------------------------------------------------------------------- */
245 #define ZLOG_LQHKEYREQ 0
246 #define ZPACK_LQHKEYREQ 1
247 #define ZSEND_ATTRINFO 2
248 #define ZSR_GCI_LIMITS 3
249 #define ZSR_LOG_LIMITS 4
250 #define ZSEND_EXEC_CONF 5
251 #define ZEXEC_SR 6
252 #define ZSR_FOURTH_COMP 7
253 #define ZINIT_FOURTH 8
254 #define ZTIME_SUPERVISION 9
255 #define ZSR_PHASE3_START 10
256 #define ZLQH_TRANS_NEXT 11
257 #define ZLQH_RELEASE_AT_NODE_FAILURE 12
258 #define ZSCAN_TC_CONNECT 13
259 #define ZINITIALISE_RECORDS 14
260 #define ZINIT_GCP_REC 15
261 #define ZCHECK_LCP_STOP_BLOCKED 17
262 #define ZSCAN_MARKERS 18
263 #define ZOPERATION_EVENT_REP 19
264 #define ZDROP_TABLE_WAIT_USAGE 20
265 #define ZENABLE_EXPAND_CHECK 21
266 #define ZRETRY_TCKEYREF 22
267 #define ZWAIT_REORG_SUMA_FILTER_ENABLED 23
268 #define ZREBUILD_ORDERED_INDEXES 24
269 #define ZWAIT_READONLY 25
270 #define ZLCP_FRAG_WATCHDOG 26
271 #if defined ERROR_INSERT
272 #define ZDELAY_FS_OPEN 27
273 #endif
274 #define ZSTART_LOCAL_LCP 28
275 #define ZCHECK_SYSTEM_SCANS 29
276 #define ZSTART_QUEUED_SCAN 30
277 #define ZLQH_SHRINK_TRANSIENT_POOLS 31
278 #define ZLQH_TRANSIENT_POOL_STAT 32
279 #define ZPGMAN_PREP_LCP_ACTIVE_CHECK 33
280
281 /* ------------------------------------------------------------------------- */
282 /* NODE STATE DURING SYSTEM RESTART, VARIABLES CNODES_SR_STATE */
283 /* AND CNODES_EXEC_SR_STATE. */
284 /* ------------------------------------------------------------------------- */
285 #define ZSTART_SR 1
286 #define ZEXEC_SR_COMPLETED 2
287 /* ------------------------------------------------------------------------- */
288 /* CONSTANTS USED BY NODE STATUS TO DEDUCE THE STATUS OF A NODE. */
289 /* ------------------------------------------------------------------------- */
290 #define ZNODE_UP 0
291 #define ZNODE_DOWN 1
292 /* ------------------------------------------------------------------------- */
293 /* START PHASES */
294 /* ------------------------------------------------------------------------- */
295 #define ZLAST_START_PHASE 255
296 #define ZSTART_PHASE1 1
297 #define ZSTART_PHASE2 2
298 #define ZSTART_PHASE3 3
299 #define ZSTART_PHASE4 4
300 #define ZSTART_PHASE6 6
301 /* ------------------------------------------------------------------------- */
302 /* CONSTANTS USED BY SCAN AND COPY FRAGMENT PROCEDURES */
303 /* ------------------------------------------------------------------------- */
304 #define ZSTORED_PROC_SCAN 0
305 #define ZSTORED_PROC_COPY 2
306 #define ZDELETE_STORED_PROC_ID 3
307 #define ZWRITE_LOCK 1
308 #define ZSCAN_FRAG_CLOSED 2
309 #define ZNUM_RESERVED_TC_CONNECT_RECORDS 3
310 #define ZNUM_RESERVED_UTIL_CONNECT_RECORDS 100
311 /* ------------------------------------------------------------------------- */
312 /* ERROR CODES ADDED IN VERSION 0.1 AND 0.2 */
313 /* ------------------------------------------------------------------------- */
314 #define ZNOT_FOUND 1 // Not an error code, a return value
315 #define ZNO_FREE_LQH_CONNECTION 414
316 #define ZGET_DATAREC_ERROR 418
317 #define ZGET_ATTRINBUF_ERROR 419
318 #define ZNO_FREE_FRAGMENTREC 460 // Insert new fragment error code
319 #define ZTAB_FILE_SIZE 464 // Insert new fragment error code + Start kernel
320 #define ZNO_ADD_FRAGREC 465 // Insert new fragment error code
321 /* ------------------------------------------------------------------------- */
322 /* ERROR CODES ADDED IN VERSION 0.3 */
323 /* ------------------------------------------------------------------------- */
324 #define ZTAIL_PROBLEM_IN_LOG_ERROR 410
325 #define ZGCI_TOO_LOW_ERROR 429 // GCP_SAVEREF error code
326 #define ZTAB_STATE_ERROR 474 // Insert new fragment error code
327 #define ZTOO_NEW_GCI_ERROR 479 // LCP Start error
328 /* ------------------------------------------------------------------------- */
329 /* ERROR CODES ADDED IN VERSION 0.4 */
330 /* ------------------------------------------------------------------------- */
331
332 #define ZNO_FREE_FRAG_SCAN_REC_ERROR 490 // SCAN_FRAGREF error code
333 #define ZCOPY_NO_FRAGMENT_ERROR 491 // COPY_FRAGREF error code
334 #define ZTAKE_OVER_ERROR 499
335 #define ZTO_OP_STATE_ERROR 631 // Same as in Dbacc.hpp
336 #define ZCOPY_NODE_ERROR 1204
337 #define ZTOO_MANY_COPY_ACTIVE_ERROR 1208 // COPY_FRAG and COPY_ACTIVEREF code
338 #define ZCOPY_ACTIVE_ERROR 1210 // COPY_ACTIVEREF error code
339 #define ZNO_TC_CONNECT_ERROR 1217 // Simple Read + SCAN
340 #define ZTRANSPORTER_OVERLOADED_ERROR 1218
341 /* ------------------------------------------------------------------------- */
342 /* ERROR CODES ADDED IN VERSION 1.X */
343 /* ------------------------------------------------------------------------- */
344 //#define ZSCAN_BOOK_ACC_OP_ERROR 1219 // SCAN_FRAGREF error code
345 #define ZFILE_CHANGE_PROBLEM_IN_LOG_ERROR 1220
346 #define ZTEMPORARY_REDO_LOG_FAILURE 1221
347 #define ZNO_FREE_MARKER_RECORDS_ERROR 1222
348 #define ZNODE_SHUTDOWN_IN_PROGRESS 1223
349 #define ZTOO_MANY_FRAGMENTS 1224
350 #define ZTABLE_NOT_DEFINED 1225
351 #define ZDROP_TABLE_IN_PROGRESS 1226
352 #define ZINVALID_SCHEMA_VERSION 1227
353 #define ZTABLE_READ_ONLY 1233
354 #define ZREDO_IO_PROBLEM 1234
355
356 /* ------------------------------------------------------------------------- */
357 /* ERROR CODES ADDED IN VERSION 2.X */
358 /* ------------------------------------------------------------------------- */
359 #define ZNODE_FAILURE_ERROR 400
360 #define ZBAD_UNLOCK_STATE 416
361 #define ZBAD_OP_REF 417
362 /* ------------------------------------------------------------------------- */
363 /* ERROR CODES FROM ACC */
364 /* ------------------------------------------------------------------------- */
365 #define ZNO_TUPLE_FOUND 626
366 #define ZTUPLE_ALREADY_EXIST 630
367 /* ------------------------------------------------------------------------- */
368 /* ERROR CODES FROM TUP */
369 /* ------------------------------------------------------------------------- */
370 /**
371 * 899 would be returned by an interpreted program such as a scan filter. New
372 * such programs should use 626 instead, but 899 will also be supported to
373 * remain backwards compatible. 899 is problematic since it is also used as
374 * "Rowid already allocated" (cf. ndberror.c).
375 */
376 #define ZUSER_SEARCH_CONDITION_FALSE_CODE 899
377 #endif
378
379 /**
380 * @class dblqh
381 *
382 * @section secIntro Introduction
383 *
384 * Dblqh is the coordinator of the LDM. Dblqh is responsible for
385 * performing operations on tuples. It does this job with help of
386 * Dbacc block (that manages the index structures) and Dbtup
387 * (that manages the tuples).
388 *
389 * Dblqh also keeps track of the participants and acts as a coordinator of
390 * 2-phase commits. Logical redo logging is also handled by the Dblqh
391 * block.
392 *
393 * @section secModules Modules
394 *
395 * The code is partitioned into the following modules:
396 * - START / RESTART
397 * - Start phase 1: Load our block reference and our processor id
398 * - Start phase 2: Initiate all records within the block
399 * Connect LQH with ACC and TUP.
400 * - Start phase 4: Connect LQH with LQH. Connect every LQH with
401 * every LQH in the database system.
402 * If initial start, then create the fragment log files.
403 * If system restart or node restart,
404 * then open the fragment log files and
405 * find the end of the log files.
406 * - ADD / DELETE FRAGMENT<br>
407 * Used by dictionary to create new fragments and delete old fragments.
408 * - EXECUTION<br>
409 * handles the reception of lqhkeyreq and all processing
410 * of operations on behalf of this request.
411 * This does also involve reception of various types of attrinfo
412 * and keyinfo.
413 * It also involves communication with ACC and TUP.
414 * - LOG<br>
415 * The log module handles the reading and writing of the log.
416 * It is also responsible for handling system restart.
417 * It controls the system restart in TUP and ACC as well.
418 * - TRANSACTION<br>
419 * This module handles the commit and the complete phases.
420 * - MODULE TO HANDLE TC FAILURE<br>
421 * - SCAN<br>
422 * This module contains the code that handles a scan of a particular
423 * fragment.
424 * It operates under the control of TC and orders ACC to
425 * perform a scan of all tuples in the fragment.
426 * TUP performs the necessary search conditions
427 * to ensure that only valid tuples are returned to the application.
428 * - NODE RECOVERY<br>
429 * Used when a node has failed.
430 * It performs a copy of a fragment to a new replica of the fragment.
431 * It does also shut down all connections to the failed node.
432 * - LOCAL CHECKPOINT<br>
433 * Handles execution and control of LCPs
434 * It controls the LCPs in TUP and ACC.
435 * It also interacts with DIH to control which GCPs are recoverable.
436 * - GLOBAL CHECKPOINT<br>
437 * Helps DIH in discovering when GCPs are recoverable.
438 * It handles the request gcp_savereq that requests LQH to
439 * save a particular GCP to disk and respond when completed.
440 * - FILE HANDLING<br>
441 * With submodules:
442 * - SIGNAL RECEPTION
443 * - NORMAL OPERATION
444 * - FILE CHANGE
445 * - INITIAL START
446 * - SYSTEM RESTART PHASE ONE
447 * - SYSTEM RESTART PHASE TWO,
448 * - SYSTEM RESTART PHASE THREE
449 * - SYSTEM RESTART PHASE FOUR
450 * - ERROR
451 * - TEST
452 * - LOG
453 */
454 class Dblqh
455 #ifndef DBLQH_STATE_EXTRACT
456 : public SimulatedBlock
457 #endif
458 {
459 friend class DblqhProxy;
460 friend class Backup;
461
462 public:
463 #ifndef DBLQH_STATE_EXTRACT
464 enum LcpCloseState {
465 LCP_IDLE = 0,
466 LCP_RUNNING = 1, // LCP is running
467 LCP_CLOSE_STARTED = 2 // Completion(closing of files) has started
468 };
469
470 enum ExecUndoLogState {
471 EULS_IDLE = 0,
472 EULS_STARTED = 1,
473 EULS_COMPLETED = 2
474 };
475
476 struct AddFragRecord {
477 enum AddFragStatus {
478 FREE = 0,
479 ACC_ADDFRAG = 1,
480 WAIT_TUP = 3,
481 WAIT_TUX = 5,
482 WAIT_ADD_ATTR = 6,
483 TUP_ATTR_WAIT = 7,
484 TUX_ATTR_WAIT = 9
485 };
486 AddFragStatus addfragStatus;
487 UintR fragmentPtr;
488 UintR nextAddfragrec;
489 UintR accConnectptr;
490 UintR tupConnectptr;
491 UintR tuxConnectptr;
492
493 CreateTabReq m_createTabReq;
494 LqhFragReq m_lqhFragReq;
495 LqhAddAttrReq m_addAttrReq;
496 DropFragReq m_dropFragReq;
497 DropTabReq m_dropTabReq;
498
499 Uint16 addfragErrorCode;
500 Uint16 attrSentToTup;
501 Uint16 attrReceived;
502 Uint16 totalAttrReceived;
503 Uint16 fragCopyCreation;
504 Uint16 defValNextPos;
505 Uint32 defValSectionI;
506 };
507 typedef Ptr<AddFragRecord> AddFragRecordPtr;
508
509 struct ScanRecord {
510 STATIC_CONST( TYPE_ID = RT_DBLQH_SCAN_RECORD);
511 Uint32 m_magic;
512
ScanRecordDblqh::ScanRecord513 ScanRecord() :
514 m_magic(Magic::make(TYPE_ID)),
515 scan_acc_index(0),
516 scan_acc_segments(0),
517 nextHash(RNIL),
518 prevHash(RNIL),
519 scanTcWaiting(0),
520 scanState(SCAN_FREE),
521 scanType(ST_IDLE),
522 m_reserved(0)
523 {
524 }
525
~ScanRecordDblqh::ScanRecord526 ~ScanRecord()
527 {
528 }
529
530 enum ScanState {
531 SCAN_FREE = 0,
532 WAIT_NEXT_SCAN_COPY = 1,
533 WAIT_NEXT_SCAN = 2,
534 WAIT_ACC_COPY = 3,
535 WAIT_ACC_SCAN = 4,
536 WAIT_SCAN_NEXTREQ = 5,
537 WAIT_CLOSE_SCAN = 6,
538 WAIT_CLOSE_COPY = 7,
539 WAIT_TUPKEY_COPY = 8,
540 WAIT_LQHKEY_COPY = 9,
541 IN_QUEUE = 10,
542 COPY_FRAG_HALTED = 11,
543 WAIT_START_QUEUED_SCAN = 12,
544 QUIT_START_QUEUE_SCAN = 13
545 };
546 enum ScanType {
547 ST_IDLE = 0,
548 SCAN = 1,
549 COPY = 2
550 };
551
552 /* A single scan of each fragment can have MAX_PARALLEL_OP_PER_SCAN
553 * read operations in progress at one time
554 * We must store ACC ptrs for each read operation. They are stored
555 * in SegmentedSections linked in the array below.
556 * The main oddity is that the first element of scan_acc_op_ptr is
557 * an ACC ptr, but all others are refs to SectionSegments containing
558 * ACC ptrs.
559 */
560 STATIC_CONST( MaxScanAccSegments= (
561 (MAX_PARALLEL_OP_PER_SCAN + SectionSegment::DataLength - 1) /
562 SectionSegment::DataLength) + 1);
563
564 UintR scan_acc_op_ptr[ MaxScanAccSegments ];
565 Uint32 scan_acc_index;
566 Uint32 scan_acc_segments;
567 UintR scanApiOpPtr;
568 Local_key m_row_id;
569
570 Uint32 m_max_batch_size_rows;
571 Uint32 m_max_batch_size_bytes;
572
573 Uint32 m_curr_batch_size_rows;
574 Uint32 m_curr_batch_size_bytes;
575
576 Uint32 m_exec_direct_batch_size_words;
577
578 bool check_scan_batch_completed() const;
579
580 UintR copyPtr;
581 union {
582 Uint32 nextPool;
583 Uint32 nextList;
584 };
585 Uint32 prevList;
586 Uint32 nextHash;
587 Uint32 prevHash;
equalDblqh::ScanRecord588 bool equal(const ScanRecord & key) const {
589 return scanNumber == key.scanNumber && fragPtrI == key.fragPtrI;
590 }
hashValueDblqh::ScanRecord591 Uint32 hashValue() const {
592 return fragPtrI ^ scanNumber;
593 }
594
595 UintR scanAccPtr;
596 UintR scanAiLength;
597 UintR scanErrorCounter;
598 UintR scanSchemaVersion;
599 Uint32 scanTcWaiting; // When the request came from TC, 0 is no request
600
601 /**
602 * This is _always_ main table, even in range scan
603 * in which case scanTcrec->fragmentptr is different
604 */
605 Uint32 scan_check_lcp_stop;
606 Uint32 fragPtrI;
607 UintR scanStoredProcId;
608 UintR scanTcrec;
609 BlockReference scanApiBlockref;
610 BlockReference scanBlockref;
611 ScanState scanState;
612 ScanType scanType;
613 NodeId scanNodeId;
614 Uint16 scanReleaseCounter;
615 Uint16 scanNumber;
616 Uint16 scan_lastSeen;
617
618 // scan source block, block object and function ACC TUX TUP
619 SimulatedBlock* scanBlock;
620 ExecFunction scanFunction_NEXT_SCANREQ;
621
622 Uint8 scanCompletedStatus;
623 Uint8 scanFlag;
624 Uint8 scanLockHold;
625 Uint8 scanLockMode;
626
627 Uint8 readCommitted;
628 Uint8 rangeScan;
629 Uint8 descending;
630 Uint8 tupScan;
631
632 Uint8 lcpScan;
633 Uint8 scanKeyinfoFlag;
634 Uint8 m_last_row;
635 Uint8 m_reserved;
636
637 Uint8 statScan;
638 Uint8 m_stop_batch;
639 Uint8 prioAFlag;
640 Uint8 m_first_match_flag;
641 };
642 STATIC_CONST(DBLQH_SCAN_RECORD_TRANSIENT_POOL_INDEX = 1);
643 typedef Ptr<ScanRecord> ScanRecordPtr;
644 typedef TransientPool<ScanRecord> ScanRecord_pool;
645 typedef DLCList<ScanRecord_pool> ScanRecord_list;
646 typedef LocalDLCList<ScanRecord_pool> Local_ScanRecord_list;
647 typedef DLCFifoList<ScanRecord_pool> ScanRecord_fifo;
648 typedef LocalDLCFifoList<ScanRecord_pool> Local_ScanRecord_fifo;
649 typedef DLHashTable<ScanRecord_pool> ScanRecord_hash;
650
651 // Configurable
652 ScanRecord_pool c_scanRecordPool;
653 ScanRecord_list m_reserved_scans; // LCP + NR
654 ScanRecord_hash c_scanTakeOverHash;
655
656
657 /**
658 * Constants for scan_direct_count
659 * Mainly used to keep the scheduling rules.
660 */
661 #define ZMAX_SCAN_DIRECT_COUNT 16
662
663 struct Fragrecord {
FragrecordDblqh::Fragrecord664 Fragrecord() {}
665
666 enum ExecSrStatus {
667 IDLE = 0,
668 ACTIVE = 2
669 };
670 /**
671 * Possible state transitions are:
672 * - FREE -> DEFINED Fragment record is allocated
673 * - DEFINED -> ACTIVE Add fragment is completed and
674 * fragment is ready to
675 * receive operations.
676 * - DEFINED -> ACTIVE_CREATION Add fragment is completed and
677 * fragment is ready to
678 * receive operations in parallel
679 * with a copy fragment
680 * which is performed from the
681 * primary replica
682 * - DEFINED -> CRASH_RECOVERING A fragment is ready to be
683 * recovered from a local
684 * checkpoint on disk
685 * - ACTIVE -> REMOVING A fragment is removed from the node
686 * - CRASH_RECOVERING -> ACTIVE A fragment has been recovered and
687 * are now ready for
688 * operations again.
689 * - CRASH_RECOVERING -> REMOVING Fragment recovery failed or
690 * was cancelled.
691 * - ACTIVE_CREATION -> ACTIVE A fragment is now copied and now
692 * is a normal fragment
693 * - ACTIVE_CREATION -> REMOVING Copying of the fragment failed
694 * - REMOVING -> FREE Removing of the fragment is
695 * completed and the fragment
696 * is now free again.
697 */
698 enum FragStatus {
699 FREE = 0, ///< Fragment record is currently not in use
700 FSACTIVE = 1, ///< Fragment is defined and usable for operations
701 DEFINED = 2, ///< Fragment is defined but not yet usable by
702 ///< operations
703 ACTIVE_CREATION = 3, ///< Fragment is defined and active but is under
704 ///< creation by the primary LQH.
705 CRASH_RECOVERING = 4, ///< Fragment is recovering after a crash by
706 ///< executing the fragment log and so forth.
707 ///< Will need further breakdown.
708 REMOVING = 5 ///< The fragment is currently removed.
709 ///< Operations are not allowed.
710 };
711 enum LogFlag {
712 STATE_TRUE = 0,
713 STATE_FALSE = 1
714 };
715 enum SrStatus {
716 SS_IDLE = 0,
717 SS_STARTED = 1,
718 SS_COMPLETED = 2
719 };
720 enum LcpFlag {
721 LCP_STATE_TRUE = 0,
722 LCP_STATE_FALSE = 1
723 };
724 /**
725 * Last GCI for executing the fragment log in this phase.
726 */
727 UintR execSrLastGci[4];
728 /**
729 * Start GCI for executing the fragment log in this phase.
730 */
731 UintR execSrStartGci[4];
732 /**
733 * Requesting user pointer for executing the fragment log in
734 * this phase
735 */
736 UintR execSrUserptr[4];
737 /**
738 * The LCP identifier of the LCP's.
739 * =0 means that the LCP number has not been stored.
740 * The LCP identifier is supplied by DIH when starting the LCP.
741 */
742 UintR lcpId[MAX_LCP_STORED];
743 UintR maxGciInLcp;
744 /**
745 * This variable contains the maximum global checkpoint
746 * identifier that exists in a certain local checkpoint.
747 * Maximum 4 local checkpoints is possible in this release.
748 */
749 UintR maxGciCompletedInLcp;
750 UintR srLastGci[4];
751 UintR srStartGci[4];
752 /**
753 * The fragment pointers in ACC
754 */
755 UintR accFragptr;
756 /**
757 * The EXEC_SR variables are used to keep track of which fragments
758 * that are interested in being executed as part of executing the
759 * fragment loop.
760 * It is initialised for every phase of executing the
761 * fragment log (the fragment log can be executed upto four times).
762 *
763 * Each execution is capable of executing the log records on four
764 * fragment replicas.
765 */
766 /**
767 * Requesting block reference for executing the fragment log
768 * in this phase.
769 */
770 BlockReference execSrBlockref[4];
771 /**
772 * This variable contains references to active scan and copy
773 * fragment operations on the fragment.
774 * A maximum of four concurrently active is allowed.
775 */
776
777 typedef Bitmask<8> ScanNumberMask; // Max 255 KeyInfo20::ScanNo
778 ScanNumberMask m_scanNumberMask;
779 ScanRecord_list::Head m_activeScans;
780 ScanRecord_fifo::Head m_queuedScans;
781 ScanRecord_fifo::Head m_queuedTupScans;
782 ScanRecord_fifo::Head m_queuedAccScans;
783
784 Uint16 srLqhLognode[4];
785 /**
786 * The fragment pointers in TUP and TUX
787 */
788 UintR tupFragptr;
789 UintR tuxFragptr;
790
791 /**
792 * This variable keeps track of how many operations that are
793 * active that have skipped writing the log but not yet committed
794 * or aborted. This is used during start of fragment.
795 */
796 UintR activeTcCounter;
797
798 /**
799 * This status specifies whether this fragment is actively
800 * engaged in executing the fragment log.
801 */
802 ExecSrStatus execSrStatus;
803
804 /**
805 * The fragment id of this fragment.
806 */
807 UintR fragId;
808
809 /**
810 * Status of fragment
811 */
812 FragStatus fragStatus;
813
814 /**
815 * 0 = undefined i.e fragStatus != ACTIVE_CREATION
816 * 1 = yes
817 * 2 = no
818 */
819 enum ActiveCreat {
820 AC_NORMAL = 0, // fragStatus != ACTIVE_CREATION
821 AC_IGNORED = 1, // Operation that got ignored during NR
822 AC_NR_COPY = 2 // Operation that got performed during NR
823 };
824 Uint8 m_copy_started_state;
825
826 /**
827 * This flag indicates whether logging is currently activated at
828 * the fragment.
829 * During a system restart it is temporarily shut off.
830 * Some fragments have it permanently shut off.
831 */
832 LogFlag logFlag;
833 UintR masterPtr;
834 /**
835 * This variable contains the maximum global checkpoint identifier
836 * which was completed when the local checkpoint was started.
837 */
838 /**
839 * Reference to the next fragment record in a free list of fragment
840 * records.
841 */
842 union {
843 Uint32 nextPool;
844 Uint32 nextList;
845 };
846 Uint32 prevList;
847
848 /**
849 * The newest GCI that has been committed on fragment
850 */
851 UintR newestGci;
852 Uint32 m_completed_gci;
853 SrStatus srStatus;
854 UintR srUserptr;
855 /**
856 * The global checkpoint when table was created for this fragment.
857 */
858 UintR startGci;
859 /**
860 * A reference to the table owning this fragment.
861 */
862 UintR tabRef;
863
864 /**
865 * The block reference to ACC on the fragment makes it
866 * possible to have different ACC blocks for different
867 * fragments in the future.
868 */
869 BlockReference accBlockref;
870
871 /**
872 * Ordered index block.
873 */
874 BlockReference tuxBlockref;
875 /**
876 * The master block reference as sent in COPY_ACTIVEREQ.
877 */
878 BlockReference masterBlockref;
879 /**
880 * These variables are used during system restart to recall
881 * from which node to execute the fragment log and which GCI's
882 * this node should start and stop from. Also to remember who
883 * to send the response to when system restart is completed.
884 */
885 BlockReference srBlockref;
886 /**
887 * The block reference to TUP on the fragment makes it
888 * possible to have different TUP blocks for different
889 * fragments in the future.
890 */
891 BlockReference tupBlockref;
892 /**
893 * This state indicates if the fragment will participate in a
894 * checkpoint.
895 * Temporary tables with Fragrecord::logFlag permanently off
896 * will also have Fragrecord::lcpFlag off.
897 */
898 LcpFlag lcpFlag;
899 /**
900 * Used to ensure that updates started with old
901 * configuration do not arrive here after the copy fragment
902 * has started.
903 * If they are allowed to arrive after they
904 * could update a record that has already been replicated to
905 * the new node. This type of arrival should be extremely
906 * rare but we must anyway ensure that no harm is done.
907 */
908 Uint16 copyNode;
909 /**
910 * Instance key for fast access.
911 */
912 Uint16 lqhInstanceKey;
913 /**
914 * The number of fragment replicas that will execute the log
915 * records in this round of executing the fragment
916 * log. Maximum four is possible.
917 */
918 Uint8 execSrNoReplicas;
919 /**
920 * This variable contains what type of replica this fragment
921 * is. Two types are possible:
922 * - Primary/Backup replica = 0
923 * - Stand-by replica = 1
924 *
925 * It is not possible to distinguish between primary and
926 * backup on a fragment.
927 * This can only be done per transaction.
928 * DIH can change from primary to backup without informing
929 * the various replicas about this change.
930 */
931 Uint8 fragCopy;
932 /**
933 * This is the last fragment distribution key that we have
934 * heard of.
935 */
936 Uint8 fragDistributionKey;
937 /**
938 * How many local checkpoints does the fragment contain
939 */
940 Uint16 srChkpnr;
941 Uint8 srNoLognodes;
942 /**
943 * Table type.
944 */
945 Uint8 tableType;
946 /**
947 * For ordered index fragment, i-value of corresponding
948 * fragment in primary table.
949 */
950 UintR tableFragptr;
951 /**
952 * The GCI when the table was created
953 */
954 Uint32 createGci;
955
956 /**
957 * Log part
958 */
959 Uint32 m_log_part_ptr_i;
960 /**
961 * LCP_FRAG_ORD info for the c_queued_lcp_frag_ord queue.
962 */
963 enum LcpExecutionState
964 {
965 LCP_QUEUED = 0,
966 LCP_EXECUTING = 1,
967 LCP_EXECUTED = 2,
968 LCP_EXECUTED_BY_CREATE_TABLE = 3
969 };
970
971 /*
972 Usage counters. Except for m_queuedScanCount, these only count 'user'
973 operations, i.e. those directly initiated from the ndbapi, and not
974 'internal' operations, such as those used for LCPs.
975 */
976 struct UsageStat
977 {
978 // Number of key read operations.
979 Uint64 m_readKeyReqCount;
980
981 // Number of inserts.
982 Uint64 m_insKeyReqCount;
983
984 // Number of updates.
985 Uint64 m_updKeyReqCount;
986 /*
987 Number of write operations, meaning 'update' if key exists, and 'insert'
988 otherwise.
989 */
990 Uint64 m_writeKeyReqCount;
991
992 // Number of deletes
993 Uint64 m_delKeyReqCount;
994
995 /*
996 Number of key operations refused by the LDM due to either:
997 - no matching key for update/delete.
998 - key exists already for insert.
999 - operation rejected by interpreted program.
1000 */
1001 Uint64 m_keyRefCount;
1002
1003 // Number of attrinfo words in key operations.
1004 Uint64 m_keyReqAttrWords;
1005
1006 // Number of keyinfo words in key operations.
1007 Uint64 m_keyReqKeyWords;
1008
1009 // Total size of interpeter programs for key operations.
1010 Uint64 m_keyProgramWords;
1011
1012 // Number of interpreter instructions executed for key operations.
1013 Uint64 m_keyInstructionCount;
1014
1015 // Number of words returned to client due to key operations.
1016 Uint64 m_keyReqWordsReturned;
1017
1018 // Number of fragment scans requested.
1019 Uint64 m_scanFragReqCount;
1020
1021 /*
1022 The number of rows examined during scans. Some of these may have been
1023 rejected by the interpreted program (i.e. a pushed condition), and
1024 thus not been returned to the client.
1025 */
1026 Uint64 m_scanRowsExamined;
1027
1028 // Number of scan rows returned to the client.
1029 Uint64 m_scanRowsReturned;
1030
1031 // Number of words returned to client due to scans.
1032 Uint64 m_scanWordsReturned;
1033
1034 // Total size of interpeter programs for scans.
1035 Uint64 m_scanProgramWords;
1036
1037 // Total size of scan bounds (for ordered index scans).
1038 Uint64 m_scanBoundWords;
1039
1040 // Number of interpreter instructions executed for scans.
1041 Uint64 m_scanInstructionCount;
1042
1043 // Total number of scans queued (including those from internal clients.
1044 Uint64 m_queuedScanCount;
1045
1046 // Set all counters to zero.
initDblqh::Fragrecord::UsageStat1047 void init()
1048 {
1049 memset(this, 0, sizeof *this);
1050 }
1051 };
1052 Uint32 lcp_frag_ord_lcp_no;
1053 Uint32 lcp_frag_ord_lcp_id;
1054
1055 /**
1056 * Fragment was was inserted into LCP fragment queue by a CREATE TABLE
1057 * statement. This variable is set to true only when this insertion
1058 * happens when no LCP is ongoing.
1059 */
1060 bool m_create_table_insert_lcp;
1061 /**
1062 * This variable is always set when inserted into LCP fragment queue by
1063 * a CREATE TABLE statement. It is cleared when a LCP_FRAG_ORD is received
1064 * for the fragment. Thus by checking this variable at completion of
1065 * fragment LCP we know whether to report LCP_FRAG_REP or not.
1066 */
1067 bool m_create_table_flag_lcp_frag_ord;
1068
1069 LcpExecutionState lcp_frag_ord_state;
1070 UsageStat m_useStat;
1071 Uint8 m_copy_complete_flag;
1072 /**
1073 * To keep track of which fragment have started the
1074 * current local LCP we have a value of 0 or 1. If
1075 * current local LCP is 0 the fragment will have 0
1076 * to indicate it has been started and 1 indicating
1077 * that it hasn't started yet.
1078 * The value is initialised to 0 and the value of the
1079 * first local LCP is 1.
1080 */
1081 Uint8 m_local_lcp_instance_started;
1082 };
1083 typedef Ptr<Fragrecord> FragrecordPtr;
1084 typedef ArrayPool<Fragrecord> Fragrecord_pool;
1085 typedef SLList<Fragrecord_pool> Fragrecord_list;
1086 typedef DLFifoList<Fragrecord_pool> Fragrecord_fifo;
1087
1088 /* $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ */
1089 /* $$$$$$$ GLOBAL CHECKPOINT RECORD $$$$$$ */
1090 /* $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ */
1091 /**
1092 * This record describes a global checkpoint that is
1093 * completed. It waits for all log records belonging to this
1094 * global checkpoint to be saved on disk.
1095 */
1096 struct GcpRecord {
1097 /**
1098 * The file number within each log part where the log was
1099 * located when gcp_savereq was received. The last record
1100 * belonging to this global checkpoint is certainly before
1101 * this place in the log. We could come even closer but it
1102 * would cost performance and doesn't seem like a good
1103 * idea. This is simple and it works.
1104 */
1105 Uint16 gcpFilePtr[NDB_MAX_LOG_PARTS];
1106 /**
1107 * The page number within the file for each log part.
1108 */
1109 Uint16 gcpPageNo[NDB_MAX_LOG_PARTS];
1110 /**
1111 * The word number within the last page that was written for
1112 * each log part.
1113 */
1114 Uint16 gcpWordNo[NDB_MAX_LOG_PARTS];
1115 /**
1116 * The identity of this global checkpoint.
1117 */
1118 UintR gcpId;
1119 /**
1120 * The state of this global checkpoint, one for each log part.
1121 */
1122 Uint8 gcpLogPartState[NDB_MAX_LOG_PARTS];
1123 /**
1124 * The sync state of this global checkpoint, one for each
1125 * log part.
1126 */
1127 Uint8 gcpSyncReady[NDB_MAX_LOG_PARTS];
1128 /**
1129 * User pointer of the sender of gcp_savereq (= master DIH).
1130 */
1131 UintR gcpUserptr;
1132 /**
1133 * Block reference of the sender of gcp_savereq
1134 * (= master DIH).
1135 */
1136 BlockReference gcpBlockref;
1137 }; // Size 44 bytes
1138 typedef Ptr<GcpRecord> GcpRecordPtr;
1139
1140 struct HostRecord {
1141 struct PackedWordsContainer lqh_pack[MAX_NDBMT_LQH_THREADS+1];
1142 struct PackedWordsContainer tc_pack[MAX_NDBMT_TC_THREADS+1];
1143 Uint8 inPackedList;
1144 Uint8 nodestatus;
1145 };
1146 typedef Ptr<HostRecord> HostRecordPtr;
1147
1148 /* $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ */
1149 /* $$$$$$ LOCAL CHECKPOINT SUPPORT RECORD $$$$$$$ */
1150 /* $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ */
1151 /**
1152 * This record contains the information about an outstanding
1153 * request to TUP or ACC. Used for both local checkpoints and
1154 * system restart.
1155 */
1156 struct LcpLocRecord {
1157 enum LcpLocstate {
1158 IDLE = 0,
1159 WAIT_TUP_PREPLCP = 1,
1160 WAIT_LCPHOLDOP = 2,
1161 HOLDOP_READY = 3,
1162 ACC_WAIT_STARTED = 4,
1163 ACC_STARTED = 5,
1164 ACC_COMPLETED = 6,
1165 TUP_WAIT_STARTED = 7,
1166 TUP_STARTED = 8,
1167 TUP_COMPLETED = 9,
1168 SR_ACC_STARTED = 10,
1169 SR_TUP_STARTED = 11,
1170 SR_ACC_COMPLETED = 12,
1171 SR_TUP_COMPLETED = 13
1172 };
1173 LcpLocstate lcpLocstate;
1174 Uint32 lcpRef;
1175 }; // 28 bytes
1176 typedef Ptr<LcpLocRecord> LcpLocRecordPtr;
1177
1178 /* $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ */
1179 /* $$$$$$$ LOCAL CHECKPOINT RECORD $$$$$$$ */
1180 /* $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ */
1181 /**
1182 * This record contains the information about a local
1183 * checkpoint that is ongoing. This record is also used as a
1184 * system restart record.
1185 */
1186 struct LcpRecord
1187 {
1188 enum LcpState {
1189 LCP_IDLE = 0,
1190 LCP_COMPLETED = 1,
1191 LCP_PREPARING = 2,
1192 LCP_PREPARED = 3,
1193 LCP_CHECKPOINTING = 4
1194 };
1195
1196 LcpState lcpPrepareState;
1197 LcpState lcpRunState;
1198 bool firstFragmentFlag;
1199 bool lastFragmentFlag;
1200
1201 /**
1202 * This variable is set to true when starting a new LCP AND
1203 * there are fragments inserted into the LCP queue already.
1204 * Those have been inserted by CREATE TABLE statements and
1205 * need to be executed fully before any other fragments have
1206 * their LCPs executed.
1207 */
1208 bool m_early_lcps_need_synch;
1209
1210 /**
1211 * This is set to true when sending WAIT_LCP_IDLE_REQ and
1212 * cleared again when WAIT_LCP_IDLE_CONF is received. It ensures
1213 * that we don't start any new fragment LCPs while we are waiting
1214 * for the previous ones to be completed.
1215 */
1216 bool m_wait_early_lcp_synch;
1217
1218 struct FragOrd {
1219 Uint32 fragPtrI;
1220 LcpFragOrd lcpFragOrd;
1221 };
1222 FragOrd currentPrepareFragment;
1223 FragOrd currentRunFragment;
1224
1225 Uint32 m_outstanding;
1226
1227 Uint64 m_no_of_records;
1228 Uint64 m_no_of_bytes;
1229 };
1230 typedef Ptr<LcpRecord> LcpRecordPtr;
1231
1232 struct IOTracker
1233 {
1234 STATIC_CONST( SAMPLE_TIME = 128 ); // millis
1235 STATIC_CONST( SLIDING_WINDOW_LEN = 1024 ); // millis
1236 STATIC_CONST( SLIDING_WINDOW_HISTORY_LEN = 8 );
1237
1238 void init(Uint32 partNo);
1239 Uint32 m_log_part_no;
1240 Uint32 m_current_time;
1241
1242 /**
1243 * Keep sliding window of measurement
1244 */
1245 Uint32 m_save_pos; // current pos in array
1246 Uint32 m_save_written_bytes[SLIDING_WINDOW_HISTORY_LEN];
1247 Uint32 m_save_elapsed_millis[SLIDING_WINDOW_HISTORY_LEN];
1248
1249 /**
1250 * Current sum of sliding window
1251 */
1252 Uint32 m_curr_elapsed_millis;
1253 Uint64 m_curr_written_bytes;
1254
1255 /**
1256 * Currently outstanding bytes
1257 */
1258 Uint64 m_sum_outstanding_bytes;
1259
1260 /**
1261 * How many times did we pass lag-threshold
1262 */
1263 Uint32 m_lag_cnt;
1264
1265 /**
1266 * How many seconds of writes are we lagging
1267 */
1268 Uint32 m_lag_in_seconds;
1269
1270 /**
1271 * bytes send during current sample
1272 */
1273 Uint64 m_sample_sent_bytes;
1274
1275 /**
1276 * bytes completed during current sample
1277 */
1278 Uint64 m_sample_completed_bytes;
1279
1280 /**
1281 * bytes completed since last report
1282 */
1283 Uint64 m_redo_written_bytes;
1284
1285 int tick(Uint32 now, Uint32 maxlag, Uint32 maxlag_cnt);
1286 void send_io(Uint32 bytes);
1287 void complete_io(Uint32 bytes);
get_lag_cntDblqh::IOTracker1288 Uint32 get_lag_cnt()
1289 {
1290 return m_lag_cnt;
1291 }
get_lag_in_secondsDblqh::IOTracker1292 Uint32 get_lag_in_seconds()
1293 {
1294 return m_lag_in_seconds;
1295 }
get_and_reset_redo_written_bytesDblqh::IOTracker1296 Uint64 get_and_reset_redo_written_bytes()
1297 {
1298 Uint64 redo_written_bytes = m_redo_written_bytes;
1299 m_redo_written_bytes = 0;
1300 return redo_written_bytes;
1301 }
1302 };
1303 bool c_is_io_lag_reported;
1304 bool is_ldm_instance_io_lagging();
1305 Uint64 report_redo_written_bytes();
1306
1307 /**
1308 * RedoWorkStats
1309 *
1310 * Structure for tracking the work performed to recover
1311 * from redo
1312 */
1313 class RedoWorkStats
1314 {
1315 public:
1316 Uint64 m_pagesRead;
1317
1318 Uint64 m_opsPrepared;
1319 Uint64 m_opsSkipped;
1320 Uint64 m_opsExecuted;
1321 Uint64 m_bytesExecuted;
1322 Uint32 m_gcisExecuted;
1323
RedoWorkStats()1324 RedoWorkStats()
1325 :m_pagesRead(0),
1326 m_opsSkipped(0),
1327 m_opsExecuted(0),
1328 m_bytesExecuted(0),
1329 m_gcisExecuted(0)
1330 {}
1331 };
1332
1333 /**
1334 * LCPFragWatchdog
1335 *
1336 * Structure tracking state of LCP fragment watchdog.
1337 * This watchdog polls the state of the current LCP fragment
1338 * scan to ensure that forward progress is maintained at
1339 * a minimal rate.
1340 * It only continues running while this LQH instance
1341 * thinks a fragment scan is ongoing
1342 */
1343 struct LCPFragWatchdog
1344 {
1345 STATIC_CONST( PollingPeriodMillis = 1000 ); /* 10s */
1346 Uint32 WarnElapsedWithNoProgressMillis; /* LCP Warn, milliseconds */
1347 Uint32 MaxElapsedWithNoProgressMillis; /* LCP Fail, milliseconds */
1348
1349 SimulatedBlock* block;
1350
1351 /* Should the watchdog be running? */
1352 bool scan_running;
1353
1354 /* Is there an active thread? */
1355 bool thread_active;
1356
1357 /* LCP position and state info from Backup block */
1358 LcpStatusConf::LcpState lcpState;
1359 Uint32 tableId;
1360 Uint32 fragId;
1361 Uint64 completionStatus;
1362 Uint32 lcpScannedPages;
1363
1364 /* Total elapsed milliseconds with no LCP progress observed */
1365 Uint32 elapsedNoProgressMillis; /* milliseconds */
1366 NDB_TICKS lastChecked; /* Last time LCP progress checked */
1367
1368 /* Reinitialise the watchdog */
1369 void reset();
1370
1371 /* Handle an LCP Status report */
1372 void handleLcpStatusRep(LcpStatusConf::LcpState repLcpState,
1373 Uint32 repTableId,
1374 Uint32 repFragId,
1375 Uint64 repCompletionStatus,
1376 Uint32 repLcpScannedPages);
1377 };
1378
1379 LCPFragWatchdog c_lcpFragWatchdog;
1380
1381
1382 /* $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ */
1383 /* $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ */
1384 /* */
1385 /* THE RECORDS THAT START BY LOG_ ARE A PART OF THE LOG MANAGER. */
1386 /* THESE RECORDS ARE USED TO HANDLE THE FRAGMENT LOG. */
1387 /* */
1388 /* $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ */
1389 /* $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ */
1390 /* $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ */
1391 /* $$$$$$$ LOG RECORD $$$$$$$ */
1392 /* */
1393 /* $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ */
1394 /* THIS RECORD IS ALIGNED TO BE 256 BYTES. */
1395 /* $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ */
1396 /**
1397 * This record describes the current state of a log.
1398 * A log consists of a number of log files.
1399 * These log files are described by the log file record.
1400 *
1401 * There will be 4 sets of log files.
1402 * Different tables will use different log files dependent
1403 * on the table id.
1404 * This ensures that more than one outstanding request can
1405 * be sent to the file system.
1406 * The log file to use is found by performing a very simple hash
1407 * function.
1408 */
1409 struct LogPartRecord {
1410 enum LogPartState {
1411 IDLE = 0, ///< Nothing happens at the moment
1412 ACTIVE = 1, ///< An operation is active logging
1413 SR_FIRST_PHASE = 2, ///< Finding the end of the log and
1414 ///< the information about global
1415 ///< checkpoints in the log is ongoing.
1416 SR_FIRST_PHASE_COMPLETED = 3, ///< First phase completed
1417 SR_THIRD_PHASE_STARTED = 4, ///< Executing fragment log is in 3rd ph
1418 SR_THIRD_PHASE_COMPLETED = 5,
1419 SR_FOURTH_PHASE_STARTED = 6, ///< Finding the log tail and head
1420 ///< is the fourth phase.
1421 SR_FOURTH_PHASE_COMPLETED = 7
1422 };
1423 enum WaitWriteGciLog {
1424 WWGL_TRUE = 0,
1425 WWGL_FALSE = 1
1426 };
1427 enum LogExecState {
1428 LES_IDLE = 0,
1429 LES_SEARCH_STOP = 1,
1430 LES_SEARCH_START = 2,
1431 LES_EXEC_LOG = 3,
1432 LES_EXEC_LOG_NEW_MBYTE = 4,
1433 LES_EXEC_LOG_NEW_FILE = 5,
1434 LES_EXEC_LOGREC_FROM_FILE = 6,
1435 LES_EXEC_LOG_COMPLETED = 7,
1436 LES_WAIT_READ_EXEC_SR_NEW_MBYTE = 8,
1437 LES_WAIT_READ_EXEC_SR = 9,
1438 LES_EXEC_LOG_INVALIDATE = 10
1439 };
1440
1441 Uint64 m_total_written_words;
1442 Uint64 m_last_total_written_words;
1443
1444 /**
1445 * Keep track of number of words that will eventually have to
1446 * be written to the REDO log as COMMIT log messages and
1447 * ABORT log messages. This ensures that we won't run out
1448 * of REDO log in COMMIT and ABORT processing.
1449 */
1450 Uint64 m_committed_words;
1451
1452 /**
1453 * Is a CONTINUEB(ZLOG_LQHKEYREQ) signal sent and
1454 * outstanding. We do not want several instances of this
1455 * signal out in the air since that would create multiple
1456 * writers of the list.
1457 */
1458 UintR LogLqhKeyReqSent;
1459 /**
1460 * Contains the current log file where log records are
1461 * written. During system restart it is used to indicate the
1462 * last log file.
1463 */
1464 UintR currentLogfile;
1465 /**
1466 * The log file used to execute log records from far behind.
1467 */
1468 UintR execSrExecLogFile;
1469 /**
1470 * The currently executing prepare record starts in this log
1471 * page. This variable is used to enable that a log record is
1472 * executed multiple times in execution of the log.
1473 */
1474 UintR execSrLogPage;
1475 /**
1476 * This variable keeps track of the lfo record where the
1477 * pages that were read from disk when an operations log
1478 * record were not found in the main memory buffer for log
1479 * pages.
1480 */
1481 UintR execSrLfoRec;
1482 /**
1483 * The starting page number when reading log from far behind.
1484 */
1485 UintR execSrStartPageNo;
1486 /**
1487 * The last page number when reading log from far behind.
1488 */
1489 UintR execSrStopPageNo;
1490 /**
1491 * Contains a reference to the first log file, file number 0.
1492 */
1493 UintR firstLogfile;
1494 /**
1495 * This variable contains the oldest operation in this log
1496 * part which have not been committed yet.
1497 */
1498 UintR firstLogTcrec;
1499 /**
1500 * The first reference to a set of 8 pages. These are used
1501 * during execution of the log to keep track of which pages
1502 * are in memory and which are not.
1503 */
1504 UintR firstPageRef;
1505 /**
1506 * This variable contains the global checkpoint record
1507 * waiting for disk writes to complete.
1508 */
1509 UintR gcprec;
1510 /**
1511 * The last reference to a set of 8 pages. These are used
1512 * during execution of the log to keep track of which pages
1513 * are in memory and which are not.
1514 */
1515 UintR lastPageRef;
1516
1517 struct OperationQueue
1518 {
initDblqh::LogPartRecord::OperationQueue1519 void init() { firstElement = lastElement = RNIL;}
isEmptyDblqh::LogPartRecord::OperationQueue1520 bool isEmpty() const { return firstElement == RNIL; }
1521 Uint32 firstElement;
1522 Uint32 lastElement;
1523 };
1524
1525 /**
1526 * operations queued waiting on REDO to prepare
1527 */
1528 struct OperationQueue m_log_prepare_queue;
1529
1530 /**
1531 * operations queued waiting on REDO to commit/abort
1532 */
1533 struct OperationQueue m_log_complete_queue;
1534
1535 /**
1536 * This variable contains the newest operation in this log
1537 * part which have not been committed yet.
1538 */
1539 UintR lastLogTcrec;
1540 /**
1541 * This variable indicates which was the last mbyte that was
1542 * written before the system crashed. Discovered during
1543 * system restart.
1544 */
1545 UintR lastLogfile;
1546 /**
1547 * This variable is used to keep track of the state during
1548 * the third phase of the system restart, i.e. when
1549 * LogPartRecord::logPartState ==
1550 * LogPartRecord::SR_THIRD_PHASE_STARTED.
1551 */
1552 LogExecState logExecState;
1553 /**
1554 * This variable contains the lap number of this log part.
1555 */
1556 UintR logLap;
1557 /**
1558 * This variable contains the place to stop executing the log
1559 * in this phase.
1560 */
1561 UintR logLastGci;
1562 /**
1563 * This variable contains the place to start executing the
1564 * log in this phase.
1565 */
1566 UintR logStartGci;
1567 /**
1568 * The latest GCI completed in this log part.
1569 */
1570 UintR logPartNewestCompletedGCI;
1571 /**
1572 * The current state of this log part.
1573 */
1574 LogPartState logPartState;
1575
1576 /**
1577 * does current log-part have tail-problem (i.e 410)
1578 */
1579 enum {
1580 P_TAIL_PROBLEM = 0x1,// 410
1581 P_REDO_IO_PROBLEM = 0x2,// 1234
1582 P_FILE_CHANGE_PROBLEM = 0x4 // 1220
1583 };
1584 Uint32 m_log_problems;
1585
1586 /**
1587 * A timer that is set every time a log page is sent to disk.
1588 * Ensures that log pages are not kept in main memory for
1589 * more than a certain time.
1590 */
1591 UintR logPartTimer;
1592 /**
1593 * The current timer which is set by the periodic signal
1594 * received by LQH
1595 */
1596 UintR logTimer;
1597 /**
1598 * Contains the number of the log tail file and the mbyte
1599 * reference within that file. This information ensures that
1600 * the tail is not overwritten when writing new log records.
1601 */
1602 UintR logTailFileNo;
1603 /**
1604 * The TcConnectionrec used during execution of this log part.
1605 */
1606 UintR logTcConrec;
1607 /**
1608 * The number of pages that currently resides in the main
1609 * memory buffer. It does not refer pages that are currently
1610 * read from the log files. Only to pages already read
1611 * from the log file.
1612 */
1613 UintR mmBufferSize;
1614 /**
1615 * Contains the current number of log files in this log part.
1616 */
1617 UintR noLogFiles;
1618 /**
1619 * This variable is used only during execution of a log
1620 * record. It keeps track of in which page record a log
1621 * record was started. It is used then to deduce which
1622 * pages that are dirty after that the log records on the
1623 * page have been executed.
1624 *
1625 * It is also used to find out where to write the invalidate
1626 * command when that is needed.
1627 */
1628 UintR prevLogpage;
1629 union {
1630 /**
1631 * The number of files remaining to gather GCI information
1632 * for during system restart. Only used if number of files
1633 * is larger than 60.
1634 */
1635 UintR srRemainingFiles;
1636
1637 /**
1638 * The index of the file which we should start loading redo
1639 * meta information from after the 'FRONTPAGE' file has been
1640 * closed.
1641 */
1642 UintR srLastFileIndex;
1643 };
1644 /**
1645 * The log file where to start executing the log during
1646 * system restart.
1647 */
1648 UintR startLogfile;
1649 /**
1650 * The last log file in which to execute the log during system
1651 * restart.
1652 */
1653 UintR stopLogfile;
1654 /**
1655 * This variable keeps track of when we want to write a complete
1656 * gci log record but have been blocked by an ongoing log operation.
1657 */
1658 WaitWriteGciLog waitWriteGciLog;
1659 /**
1660 * The currently executing prepare record starts in this index
1661 * in the log page.
1662 */
1663 Uint16 execSrLogPageIndex;
1664 /**
1665 * Which of the four exec_sr's in the fragment is currently executing
1666 */
1667 Uint16 execSrExecuteIndex;
1668 /**
1669 * The number of pages executed in the current mbyte.
1670 */
1671 Uint16 execSrPagesExecuted;
1672 /**
1673 * The number of pages read from disk that have arrived and are
1674 * currently awaiting execution of the log.
1675 */
1676 Uint16 execSrPagesRead;
1677 /**
1678 * The number of pages read from disk and currently not arrived
1679 * to the block.
1680 */
1681 Uint16 execSrPagesReading;
1682 /**
1683 * This variable refers to the new header file where we will
1684 * start writing the log after a system restart have been completed.
1685 */
1686 Uint16 headFileNo;
1687 /**
1688 * This variable refers to the page number within the header file.
1689 */
1690 Uint16 headPageNo;
1691 /**
1692 * This variable refers to the index within the new header
1693 * page.
1694 */
1695 Uint16 headPageIndex;
1696 /**
1697 * This variables indicates which was the last mbyte in the last
1698 * logfile before a system crash. Discovered during system restart.
1699 */
1700 Uint16 lastMbyte;
1701 /**
1702 * This variable is used only during execution of a log
1703 * record. It keeps track of in which file page a log
1704 * record was started. It is used if it is needed to write a
1705 * dirty page to disk during log execution (this happens when
1706 * commit records are invalidated).
1707 */
1708 Uint16 prevFilepage;
1709 /**
1710 * This is used to save where we were in the execution of log
1711 * records when we find a commit record that needs to be
1712 * executed.
1713 *
1714 * This variable is also used to remember the index where the
1715 * log type was in the log record. It is only used in this
1716 * role when finding a commit record that needs to be
1717 * invalidated.
1718 */
1719 Uint16 savePageIndex;
1720 Uint16 logTailMbyte;
1721 /**
1722 * The mbyte within the starting log file where to start
1723 * executing the log.
1724 */
1725 Uint16 startMbyte;
1726 /**
1727 * The last mbyte in which to execute the log during system
1728 * restart.
1729 */
1730 Uint16 stopMbyte;
1731 /**
1732 * This variable refers to the file where invalidation is
1733 * occurring during system/node restart.
1734 */
1735 Uint16 invalidateFileNo;
1736 /**
1737 * This variable refers to the page where invalidation is
1738 * occurring during system/node restart.
1739 */
1740 Uint16 invalidatePageNo;
1741 /**
1742 * For MT LQH the log part (0-3).
1743 */
1744 Uint16 logPartNo;
1745
1746 /**
1747 * Keep track of the first invalid log page found in our search. This
1748 * enables us to print information about irregular writes of log pages
1749 * at the end of the REDO log.
1750 */
1751 Uint16 endInvalidMByteSearch;
1752 Uint16 firstInvalidateFileNo;
1753 Uint16 firstInvalidatePageNo;
1754 bool firstInvalidatePageFound;
1755 /**
1756 * IO tracker...
1757 */
1758 struct IOTracker m_io_tracker;
1759
1760 RedoWorkStats m_redoWorkStats;
1761 }; // Size 164 Bytes
1762 typedef Ptr<LogPartRecord> LogPartRecordPtr;
1763
1764 /* $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ */
1765 /* $$$$$$$ LOG FILE RECORD $$$$$$$ */
1766 /* $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ */
1767 /* THIS RECORD IS ALIGNED TO BE 288 (256 + 32) BYTES. */
1768 /* $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ */
1769 /**
1770 * This record contains information about a log file.
1771 * A log file contains log records from several tables and
1772 * fragments of a table. LQH can contain more than
1773 * one log file to ensure faster log processing.
1774 *
1775 * The number of pages to write to disk at a time is
1776 * configurable.
1777 */
1778 struct LogFileRecord {
LogFileRecordDblqh::LogFileRecord1779 LogFileRecord() {}
1780
1781 enum FileChangeState {
1782 NOT_ONGOING = 0,
1783 BOTH_WRITES_ONGOING = 1,
1784 LAST_WRITE_ONGOING = 2,
1785 FIRST_WRITE_ONGOING = 3,
1786 WRITE_PAGE_ZERO_ONGOING = 4,
1787 WAIT_FOR_OPEN_NEXT_FILE = 5,
1788 LAST_FILEWRITE_WAITS = 6,
1789 FIRST_FILEWRITE_WAITS = 7
1790 };
1791 enum LogFileStatus {
1792 LFS_IDLE = 0, ///< Log file record not in use
1793 CLOSED = 1, ///< Log file closed
1794 OPENING_INIT = 2,
1795 OPEN_SR_FRONTPAGE = 3, ///< Log file opened as part of system
1796 ///< restart. Open file 0 to find
1797 ///< the front page of the log part.
1798 OPEN_SR_LAST_FILE = 4, ///< Open last log file that was written
1799 ///< before the system restart.
1800 OPEN_SR_NEXT_FILE = 5, ///< Open a log file which is 16 files
1801 ///< backwards to find the next
1802 ///< information about GCPs.
1803 OPEN_EXEC_SR_START = 6, ///< Log file opened as part of
1804 ///< executing
1805 ///< log during system restart.
1806 OPEN_EXEC_SR_NEW_MBYTE = 7,
1807 OPEN_SR_FOURTH_PHASE = 8,
1808 OPEN_SR_FOURTH_NEXT = 9,
1809 OPEN_SR_FOURTH_ZERO = 10,
1810 OPENING_WRITE_LOG = 11, ///< Log file opened as part of writing
1811 ///< log during normal operation.
1812 OPEN_EXEC_LOG = 12,
1813 CLOSING_INIT = 13,
1814 CLOSING_SR = 14, ///< Log file closed as part of system
1815 ///< restart. Currently trying to
1816 ///< find where to start executing the
1817 ///< log
1818 CLOSING_EXEC_SR = 15, ///< Log file closed as part of
1819 ///< executing log during system restart
1820 CLOSING_EXEC_SR_COMPLETED = 16,
1821 CLOSING_WRITE_LOG = 17, ///< Log file closed as part of writing
1822 ///< log during normal operation.
1823 CLOSING_EXEC_LOG = 18,
1824 OPEN_INIT = 19,
1825 OPEN = 20, ///< Log file open
1826 OPEN_SR_READ_INVALIDATE_PAGES = 21,
1827 CLOSE_SR_READ_INVALIDATE_PAGES = 22,
1828 OPEN_SR_WRITE_INVALIDATE_PAGES = 23,
1829 CLOSE_SR_WRITE_INVALIDATE_PAGES = 24,
1830 OPEN_SR_READ_INVALIDATE_SEARCH_FILES = 25,
1831 CLOSE_SR_READ_INVALIDATE_SEARCH_FILES = 26,
1832 CLOSE_SR_READ_INVALIDATE_SEARCH_LAST_FILE = 27
1833 #ifndef NO_REDO_OPEN_FILE_CACHE
1834 ,OPEN_EXEC_LOG_CACHED = 28
1835 ,CLOSING_EXEC_LOG_CACHED = 29
1836 #endif
1837 ,CLOSING_SR_FRONTPAGE = 30
1838 };
1839
1840 /**
1841 * When a new mbyte is started in the log we have to find out
1842 * how far back in the log we still have prepared operations
1843 * which have been neither committed or aborted. This variable
1844 * keeps track of this value for each of the mbytes in this
1845 * log file. This is used in writing down these values in the
1846 * header of each log file. That information is used during
1847 * system restart to find the tail of the log.
1848 */
1849 UintR *logLastPrepRef;
1850 /**
1851 * The max global checkpoint completed before the mbyte in the
1852 * log file was started. One variable per mbyte.
1853 */
1854 UintR *logMaxGciCompleted;
1855 /**
1856 * The max global checkpoint started before the mbyte in the log
1857 * file was started. One variable per mbyte.
1858 */
1859 UintR *logMaxGciStarted;
1860 /**
1861 * This variable contains the file name as needed by the file
1862 * system when opening the file.
1863 */
1864 UintR fileName[4];
1865 /**
1866 * This variable has a reference to the log page which is
1867 * currently in use by the log.
1868 */
1869 UintR currentLogpage;
1870 /**
1871 * The number of the current mbyte in the log file.
1872 */
1873 UintR currentMbyte;
1874 /**
1875 * This variable is used when changing files. It is to find
1876 * out when both the last write in the previous file and the
1877 * first write in this file has been completed. After these
1878 * writes have completed the variable keeps track of when the
1879 * write to page zero in file zero is completed.
1880 */
1881 FileChangeState fileChangeState;
1882 /**
1883 * The number of the file within this log part.
1884 */
1885 UintR fileNo;
1886 /**
1887 * This variable shows where to read/write the next pages into
1888 * the log. Used when writing the log during normal operation
1889 * and when reading the log during system restart. It
1890 * specifies the page position where each page is 8 kbyte.
1891 */
1892 UintR filePosition;
1893 /**
1894 * This contains the file pointer needed by the file system
1895 * when reading/writing/closing and synching.
1896 */
1897 UintR fileRef;
1898 /**
1899 * The head of the pages waiting for shipment to disk.
1900 * They are filled with log info.
1901 */
1902 UintR firstFilledPage;
1903 /**
1904 * A list of active read/write operations on the log file.
1905 * Operations are always put in last and the first should
1906 * always complete first.
1907 */
1908 UintR firstLfo;
1909 UintR lastLfo;
1910 /**
1911 * The tail of the pages waiting for shipment to disk.
1912 * They are filled with log info.
1913 */
1914 UintR lastFilledPage;
1915 /**
1916 * This variable keeps track of the last written page in the
1917 * file while writing page zero in file zero when changing log
1918 * file.
1919 */
1920 UintR lastPageWritten;
1921 /**
1922 * This variable keeps track of the last written word in the
1923 * last page written in the file while writing page zero in
1924 * file zero when changing log file.
1925 */
1926 UintR lastWordWritten;
1927 /**
1928 * This variable contains the last word written in the last page.
1929 */
1930 LogFileStatus logFileStatus;
1931 /**
1932 * A reference to page zero in this file.
1933 * This page is written before the file is closed.
1934 */
1935 UintR logPageZero;
1936 /**
1937 * This variable contains a reference to the record describing
1938 * this log part. One of four records (0,1,2 or 3).
1939 */
1940 UintR logPartRec;
1941 /**
1942 * Next free log file record or next log file in this log.
1943 */
1944 UintR nextLogFile;
1945 /**
1946 * The previous log file.
1947 */
1948 UintR prevLogFile;
1949 /**
1950 * The number of remaining words in this mbyte of the log file.
1951 */
1952 UintR remainingWordsInMbyte;
1953 /**
1954 * The current file page within the current log file. This is
1955 * a reference within the file and not a reference to a log
1956 * page record. It is used to deduce where log records are
1957 * written. Particularly completed gcp records and prepare log
1958 * records.
1959 */
1960 Uint16 currentFilepage;
1961 /**
1962 * The number of pages in the list referenced by
1963 * LOG_PAGE_BUFFER.
1964 */
1965 Uint16 noLogpagesInBuffer;
1966
1967 #ifndef NO_REDO_OPEN_FILE_CACHE
1968 Uint32 nextList;
1969 Uint32 prevList;
1970 #endif
1971 }; // Size 288 bytes
1972 typedef Ptr<LogFileRecord> LogFileRecordPtr;
1973 typedef ArrayPool<LogFileRecord> LogFileRecord_pool;
1974 typedef DLCFifoList<LogFileRecord_pool> LogFileRecord_fifo;
1975
1976 /* $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ */
1977 /* $$$$$$$ LOG OPERATION RECORD $$$$$$$ */
1978 /* $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ */
1979 /**
1980 * This record contains a currently active file operation
1981 * that has started by the log module.
1982 */
1983 struct LogFileOperationRecord {
1984 enum LfoState {
1985 IDLE = 0, ///< Operation is not used at the moment
1986 INIT_WRITE_AT_END = 1, ///< Write in file so that it grows to
1987 ///< 16 Mbyte
1988 INIT_FIRST_PAGE = 2, ///< Initialise the first page in a file
1989 WRITE_GCI_ZERO = 3,
1990 WRITE_INIT_MBYTE = 4,
1991 WRITE_DIRTY = 5,
1992 READ_SR_FRONTPAGE = 6, ///< Read page zero in file zero during
1993 ///< system restart
1994 READ_SR_LAST_FILE = 7, ///< Read page zero in last file open
1995 ///< before system crash
1996 READ_SR_NEXT_FILE = 8, ///< Read 60 files backwards to find
1997 ///< further information GCPs in page
1998 ///< zero
1999 READ_SR_LAST_MBYTE = 9,
2000 READ_EXEC_SR = 10,
2001 READ_EXEC_LOG = 11,
2002 READ_SR_FOURTH_PHASE = 12,
2003 READ_SR_FOURTH_ZERO = 13,
2004 FIRST_PAGE_WRITE_IN_LOGFILE = 14,
2005 LAST_WRITE_IN_FILE = 15,
2006 WRITE_PAGE_ZERO = 16,
2007 ACTIVE_WRITE_LOG = 17, ///< A write operation during
2008 ///< writing of log
2009 READ_SR_INVALIDATE_PAGES = 18,
2010 WRITE_SR_INVALIDATE_PAGES = 19,
2011 WRITE_SR_INVALIDATE_PAGES_UPDATE_PAGE0 = 20
2012 ,READ_SR_INVALIDATE_SEARCH_FILES = 21
2013 };
2014 /**
2015 * We have to remember the log pages read.
2016 * Otherwise we cannot build the linked list after the pages have
2017 * arrived to main memory.
2018 */
2019 UintR logPageArray[16];
2020 /**
2021 * A list of the pages that are part of this active operation.
2022 */
2023 UintR firstLfoPage;
2024 /**
2025 * A timer to ensure that records are not lost.
2026 */
2027 UintR lfoTimer;
2028 /**
2029 * The word number of the last written word in the last during
2030 * a file write.
2031 */
2032 UintR lfoWordWritten;
2033 /**
2034 * This variable contains the state of the log file operation.
2035 */
2036 LfoState lfoState;
2037 /**
2038 * The log file that the file operation affects.
2039 */
2040 UintR logFileRec;
2041 /**
2042 * The log file operations on a file are kept in a linked list.
2043 */
2044 UintR nextLfo;
2045 /**
2046 * The page number of the first read/written page during a file
2047 * read/write.
2048 */
2049 Uint16 lfoPageNo;
2050 /**
2051 * The number of pages written or read during an operation to
2052 * the log file.
2053 */
2054 Uint16 noPagesRw;
2055 }; // 92 bytes
2056 typedef Ptr<LogFileOperationRecord> LogFileOperationRecordPtr;
2057
2058 /* $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ */
2059 /* $$$$$$$ LOG PAGE RECORD $$$$$$$ */
2060 /* $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ */
2061 /**
2062 * These are the 8 k pages used to store log records before storing
2063 * them in the file system.
2064 * Since 64 kbyte is sent to disk at a time it is necessary to have
2065 * at least 4*64 kbytes of log pages.
2066 * To handle multiple outstanding requests we need some additional pages.
2067 * Thus we allocate 1 mbyte to ensure that we do not get problems with
2068 * insufficient number of pages.
2069 */
2070 struct LogPageRecord {
2071 /**
2072 * This variable contains the pages that are sent to disk.
2073 *
2074 * All pages contain a header of 12 words:
2075 * - WORD 0: CHECKSUM Calculated before storing on disk and
2076 * checked when read from disk.
2077 * - WORD 1: LAP How many wraparounds have the log
2078 * experienced since initial start of the
2079 * system.
2080 * - WORD 2: MAX_GCI_COMPLETED Which is the maximum gci which have
2081 * completed before this page. This
2082 * gci will not be found in this
2083 * page and hereafter in the log.
2084 * - WORD 3: MAX_GCI_STARTED The maximum gci which have started
2085 * before this page.
2086 * - WORD 4: NEXT_PAGE Pointer to the next page.
2087 * Only used in main memory
2088 * - WORD 5: PREVIOUS_PAGE Pointer to the previous page.
2089 * Currently not used.
2090 * - WORD 6: VERSION NDB version that wrote the page.
2091 * - WORD 7: NO_LOG_FILES Number of log files in this log part.
2092 * - WORD 8: CURRENT PAGE INDEX This keeps track of where we are in the
2093 * page.
2094 * This is only used when pages is in
2095 * memory.
2096 * - WORD 9: OLD PREPARE FILE NO This keeps track of the oldest prepare
2097 * operation still alive (not committed
2098 * or aborted) when this mbyte started.
2099 * - WORD 10: OLD PREPARE PAGE REF File page reference within this file
2100 * number.
2101 * Page no + Page index.
2102 * If no prepare was alive then these
2103 * values points this mbyte.
2104 * - WORD 11: DIRTY FLAG = 0 means not dirty and
2105 * = 1 means the page is dirty.
2106 * Is used when executing log when
2107 * a need to write invalid commit
2108 * records arise.
2109 *
2110 * The remaining 2036 words are used for log information, i.e.
2111 * log records.
2112 *
2113 * A log record on this page has the following layout:
2114 * - WORD 0: LOG RECORD TYPE
2115 * The following types are supported:
2116 * - PREPARE OPERATION An operation not yet committed.
2117 * - NEW PREPARE OPERATION A prepared operation already
2118 * logged is inserted
2119 * into the log again so that the
2120 * log tail can be advanced.
2121 * This can happen when a transaction is
2122 * committed for a long time.
2123 * - ABORT TRANSACTION A previously prepared transaction
2124 * was aborted.
2125 * - COMMIT TRANSACTION A previously prepared transaction
2126 * was committed.
2127 * - INVALID COMMIT A previous commit record was
2128 * invalidated by a
2129 * subsequent system restart.
2130 * A log record must be invalidated
2131 * in a system restart if it belongs
2132 * to a global checkpoint id which
2133 * is not included in the system
2134 * restart.
2135 * Otherwise it will be included in
2136 * a subsequent system restart since
2137 * it will then most likely belong
2138 * to a global checkpoint id which
2139 * is part of that system
2140 * restart.
2141 * This is not a correct behaviour
2142 * since this operation is lost in a
2143 * system restart and should not
2144 * reappear at a later system
2145 * restart.
2146 * - COMPLETED GCI A GCI has now been completed.
2147 * - FRAGMENT SPLIT A fragment has been split
2148 * (not implemented yet)
2149 * - FILE DESCRIPTOR This is always the first log record
2150 * in a file.
2151 * It is always placed on page 0 after
2152 * the header.
2153 * It is written when the file is
2154 * opened and when the file is closed.
2155 * - NEXT LOG RECORD This log record only records where
2156 * the next log record starts.
2157 * - NEXT MBYTE RECORD This log record specifies that there
2158 * are no more log records in this mbyte.
2159 *
2160 *
2161 * A FILE DESCRIPTOR log record continues as follows:
2162 * - WORD 1: NO_LOG_DESCRIPTORS This defines the number of
2163 * descriptors of log files that
2164 * will follow hereafter (max 32).
2165 * the log descriptor will describe
2166 * information about
2167 * max_gci_completed,
2168 * max_gci_started and log_lap at
2169 * every 1 mbyte of the log file
2170 * since a log file is 16 mbyte
2171 * always, i need 16 entries in the
2172 * array with max_gci_completed,
2173 * max_gci_started and log_lap. thus
2174 * 32 entries per log file
2175 * descriptor (max 32*48 = 1536,
2176 * always fits in page 0).
2177 * - WORD 2: LAST LOG FILE The number of the log file currently
2178 * open. This is only valid in file 0.
2179 * - WORD 3 - WORD 18: MAX_GCI_COMPLETED for every 1 mbyte
2180 * in this log file.
2181 * - WORD 19 - WORD 34: MAX_GCI_STARTED for every 1 mbyte
2182 * in this log file.
2183 *
2184 * Then it continues for NO_LOG_DESCRIPTORS until all subsequent
2185 * log files (max 32) have been properly described.
2186 *
2187 *
2188 * A PREPARE OPERATION log record continues as follows:
2189 * - WORD 1: LOG RECORD SIZE
2190 * - WORD 2: HASH VALUE
2191 * - WORD 3: SCHEMA VERSION
2192 * - WORD 4: OPERATION TYPE
2193 * = 0 READ,
2194 * = 1 UPDATE,
2195 * = 2 INSERT,
2196 * = 3 DELETE
2197 * - WORD 5: NUMBER OF WORDS IN ATTRINFO PART
2198 * - WORD 6: KEY LENGTH IN WORDS
2199 * - WORD 7 - (WORD 7 + KEY_LENGTH - 1) The tuple key
2200 * - (WORD 7 + KEY_LENGTH) -
2201 * (WORD 7 + KEY_LENGTH + ATTRINFO_LENGTH - 1) The attrinfo
2202 *
2203 * A log record can be spread in several pages in some cases.
2204 * The next log record always starts immediately after this log record.
2205 * A log record does however never traverse a 1 mbyte boundary.
2206 * This is used to ensure that we can always come back if something
2207 * strange occurs in the log file.
2208 * To ensure this we also have log records which only records
2209 * the next log record.
2210 *
2211 *
2212 * A COMMIT TRANSACTION log record continues as follows:
2213 * - WORD 1: TRANSACTION ID PART 1
2214 * - WORD 2: TRANSACTION ID PART 2
2215 * - WORD 3: FRAGMENT ID OF THE OPERATION
2216 * - WORD 4: TABLE ID OF THE OPERATION
2217 * - WORD 5: THE FILE NUMBER OF THE PREPARE RECORD
2218 * - WORD 6: THE STARTING PAGE NUMBER OF THE PREPARE RECORD
2219 * - WORD 7: THE STARTING PAGE INDEX OF THE PREPARE RECORD
2220 * - WORD 8: THE STOP PAGE NUMBER OF THE PREPARE RECORD
2221 * - WORD 9: GLOBAL CHECKPOINT OF THE TRANSACTION
2222 *
2223 *
2224 * An ABORT TRANSACTION log record continues as follows:
2225 * - WORD 1: TRANSACTION ID PART 1
2226 * - WORD 2: TRANSACTION ID PART 2
2227 *
2228 *
2229 * A COMPLETED CGI log record continues as follows:
2230 * - WORD 1: THE COMPLETED GCI
2231 *
2232 *
2233 * A NEXT LOG RECORD log record continues as follows:
2234 * - There is no more information needed.
2235 * The next log record will always refer to the start of the next page.
2236 *
2237 * A NEXT MBYTE RECORD log record continues as follows:
2238 * - There is no more information needed.
2239 * The next mbyte will always refer to the start of the next mbyte.
2240 */
2241 UintR logPageWord[8192]; // Size 32 kbytes
2242 };
2243 typedef Ptr<LogPageRecord> LogPageRecordPtr;
2244
2245 struct PageRefRecord {
2246 UintR pageRef[8];
2247 UintR prNext;
2248 UintR prPrev;
2249 Uint16 prFileNo;
2250 Uint16 prPageNo;
2251 }; // size 44 bytes
2252 typedef Ptr<PageRefRecord> PageRefRecordPtr;
2253
2254 struct Tablerec {
2255 enum TableStatus {
2256 TABLE_DEFINED = 0,
2257 NOT_DEFINED = 1,
2258 ADD_TABLE_ONGOING = 2,
2259 PREP_DROP_TABLE_DONE = 3,
2260 DROP_TABLE_WAIT_USAGE = 4,
2261 DROP_TABLE_WAIT_DONE = 5,
2262 DROP_TABLE_ACC = 6,
2263 DROP_TABLE_TUP = 7,
2264 DROP_TABLE_TUX = 8
2265 ,TABLE_READ_ONLY = 9
2266 };
2267
2268 UintR fragrec[MAX_FRAG_PER_LQH];
2269 Uint16 fragid[MAX_FRAG_PER_LQH];
2270 /**
2271 * Status of the table
2272 */
2273 TableStatus tableStatus;
2274 /**
2275 * Table type and target table of index.
2276 */
2277 Uint16 tableType;
2278 Uint16 primaryTableId;
2279 Uint32 schemaVersion;
2280 Uint8 m_disk_table;
2281 bool m_informed_backup_drop_tab;
2282
2283 Uint32 usageCountR; // readers
2284 Uint32 usageCountW; // writers
2285 Uint32 m_addfragptr_i;
2286 Uint32 m_senderData;
2287 Uint32 m_senderRef;
2288 }; // Size 100 bytes
2289 typedef Ptr<Tablerec> TablerecPtr;
2290 #endif // DBLQH_STATE_EXTRACT
2291 struct TcConnectionrec {
2292 enum LogWriteState {
2293 NOT_STARTED = 0,
2294 NOT_WRITTEN = 1,
2295 NOT_WRITTEN_WAIT = 2,
2296 WRITTEN = 3
2297 };
2298 enum AbortState {
2299 ABORT_IDLE = 0,
2300 ABORT_ACTIVE = 1,
2301 NEW_FROM_TC = 2,
2302 REQ_FROM_TC = 3,
2303 ABORT_FROM_TC = 4,
2304 ABORT_FROM_LQH = 5
2305 };
2306 enum TransactionState {
2307 IDLE = 0,
2308
2309 /* -------------------------------------------------------------------- */
2310 // Transaction in progress states
2311 /* -------------------------------------------------------------------- */
2312 WAIT_ACC = 1,
2313 WAIT_TUPKEYINFO = 2,
2314 WAIT_ATTR = 3,
2315 WAIT_TUP = 4,
2316 LOG_QUEUED = 6,
2317 PREPARED = 7,
2318 LOG_COMMIT_WRITTEN_WAIT_SIGNAL = 8,
2319 LOG_COMMIT_QUEUED_WAIT_SIGNAL = 9,
2320
2321 /* -------------------------------------------------------------------- */
2322 // Commit in progress states
2323 /* -------------------------------------------------------------------- */
2324 LOG_COMMIT_QUEUED = 11,
2325 COMMIT_QUEUED = 12,
2326 COMMITTED = 13,
2327 WAIT_TUP_COMMIT= 35,
2328
2329 /* -------------------------------------------------------------------- */
2330 // Abort in progress states
2331 /* -------------------------------------------------------------------- */
2332 WAIT_ACC_ABORT = 14,
2333 ABORT_QUEUED = 15,
2334 WAIT_AI_AFTER_ABORT = 17,
2335 LOG_ABORT_QUEUED = 18,
2336 WAIT_TUP_TO_ABORT = 19,
2337
2338 /* -------------------------------------------------------------------- */
2339 // Scan in progress states
2340 /* -------------------------------------------------------------------- */
2341 WAIT_SCAN_AI = 20,
2342 SCAN_STATE_USED = 21,
2343 SCAN_TUPKEY = 30,
2344 COPY_TUPKEY = 31,
2345
2346 TC_NOT_CONNECTED = 32,
2347 PREPARED_RECEIVED_COMMIT = 33, // Temporary state in write commit log
2348 LOG_COMMIT_WRITTEN = 34 // Temporary state in write commit log
2349 };
2350 enum ConnectState {
2351 DISCONNECTED = 0,
2352 CONNECTED = 1,
2353 COPY_CONNECTED = 2,
2354 LOG_CONNECTED = 3
2355 };
2356 #ifndef DBLQH_STATE_EXTRACT
2357 STATIC_CONST( TYPE_ID = RT_DBLQH_TC_CONNECT);
2358 Uint32 m_magic;
2359
TcConnectionrecDblqh::TcConnectionrec2360 TcConnectionrec() :
2361 m_magic(Magic::make(TYPE_ID)),
2362 accConnectrec(RNIL),
2363 tupConnectrec(RNIL),
2364 nextTcConnectrec(RNIL),
2365 connectState(CONNECTED),
2366 keyInfoIVal(RNIL),
2367 attrInfoIVal(RNIL),
2368 //transid has no special value indicating not set
2369 abortState(ABORT_IDLE),
2370 applOprec(RNIL),
2371 clientConnectrec(RNIL),
2372 //tcTimer must be set before used
2373 currReclenAi(0),
2374 currTupAiLen(0),
2375 //fragmentid must be set in all paths before use
2376 //fragmentptr must be set in all paths before use
2377 gci_hi(0),
2378 gci_lo(0),
2379 //hashValue has no special value indicating not set
2380 logWriteState(NOT_STARTED),
2381 nextHashRec(RNIL),
2382 nextLogTcrec(RNIL),
2383 nextTcLogQueue(RNIL),
2384 prevHashRec(RNIL),
2385 prevLogTcrec(RNIL),
2386 prevTcLogQueue(RNIL),
2387 //readlenAi must be set before used
2388 //reqinfo must be set before used
2389 //schemaVersion must be set before used
2390 //tableref must be set before used
2391 tcOprec(RNIL),
2392 hashIndex(RNIL),
2393 //tcHashKeyHi must be set before used
2394 tcScanInfo(Uint32(~0)),
2395 tcScanRec(RNIL),
2396 totReclenAi(0),
2397 totSendlenAi(0),
2398 savePointId(0),
2399 transactionState(TC_NOT_CONNECTED),
2400 applRef(Uint32(~0)),
2401 clientBlockref(Uint32(~0)),
2402 //tcBlockref must be set before used
2403 commitAckMarker(RNIL),
2404 numFiredTriggers(0),
2405 lqhKeyReqId(0),
2406 //errCode must be set before used
2407 //nextReplica must be set before used
2408 primKeyLen(0),
2409 //save1 only used with short signals, must be set before used
2410 //nodeAfterNext must be set before used
2411 //activeCreat must be set before used
2412 //dirtyOp must be set before used
2413 //indTakeOver must be set before used
2414 //lastReplicaNo must be set before used
2415 //lockType only used for key operations, must be set before used
2416 nextSeqNoReplica(0),
2417 //opSimple only used for key operations, must be set before used
2418 //opExec must be set before used
2419 //operation must be set before used
2420 //m_reorg must be set before used
2421 //reclenAiLqhKey used for short signals, must be set before used
2422 //replicaType only used for write key operations, must be set before used
2423 seqNoReplica(0),
2424 //tcNodeFailrec only set when abortState is set to NEW_FROM_TC
2425 //m_disk_table set before used
2426 //m_use_rowid used for key operations, set before used
2427 //m_dealloc must be set before used
2428 //m_fire_trig_pass must be set before used
2429 m_committed_log_space(0),
2430 m_flags(0),
2431 //m_log_part_ptr_i set before used, only key ops
2432 //m_rowid set before used
2433
2434 //logStartFileNo must be set before used
2435 //logStartPageIndex must be set before used
2436 //logStartPageNo must be set before used
2437 //logStopPageNo must be set before used
2438
2439 //copyCountWords must be set before used
2440 reqRef(Uint32(~0)),
2441 reqBlockref(Uint32(~0))
2442 //m_corrFactorLo must be set before used
2443 //m_corrFactorHi must be set before used
2444 //scanKeyInfoPos only used when m_flags has OP_SCANKEYINFOPOSSAVED set
2445 //m_nr_delete only used in Copy fragment, set before used
2446 {
2447 }
2448
~TcConnectionrecDblqh::TcConnectionrec2449 ~TcConnectionrec()
2450 {
2451 }
2452 UintR accConnectrec;
2453 UintR tupConnectrec;
2454 Uint32 nextTcConnectrec;
2455 Dbacc::Operationrec *accConnectPtrP;
2456 Dbtup::Operationrec *tupConnectPtrP;
2457 ConnectState connectState;
2458 Uint32 keyInfoIVal;
2459 Uint32 attrInfoIVal;
2460 UintR transid[2];
2461 AbortState abortState;
2462 UintR applOprec;
2463 UintR clientConnectrec;
2464 UintR tcTimer;
2465 UintR currReclenAi;
2466 UintR currTupAiLen;
2467 UintR fragmentid;
2468 UintR fragmentptr;
2469 UintR gci_hi;
2470 UintR gci_lo;
2471 UintR hashValue;
2472
2473 LogWriteState logWriteState;
2474 UintR nextHashRec;
2475 UintR nextLogTcrec;
2476 UintR nextTcLogQueue;
2477 UintR prevHashRec;
2478 UintR prevLogTcrec;
2479 UintR prevTcLogQueue;
2480 UintR readlenAi;
2481 UintR reqinfo;
2482 UintR schemaVersion;
2483 UintR tableref;
2484 UintR tcOprec;
2485 UintR hashIndex;
2486 Uint32 tcHashKeyHi;
2487 UintR tcScanInfo;
2488 UintR tcScanRec;
2489 UintR totReclenAi;
2490 UintR totSendlenAi;
2491 UintR savePointId;
2492 TransactionState transactionState;
2493 BlockReference applRef;
2494 BlockReference clientBlockref;
2495
2496 BlockReference tcBlockref;
2497 Uint32 commitAckMarker;
2498 union {
2499 Uint32 m_scan_curr_range_no;
2500 UintR numFiredTriggers;
2501 };
2502 Uint64 lqhKeyReqId;
2503 Uint16 errorCode;
2504 Uint16 nextReplica;
2505 Uint16 primKeyLen;
2506 Uint16 save1;
2507 Uint16 nodeAfterNext[3];
2508
2509 Uint8 activeCreat;
2510 Uint8 dirtyOp;
2511 Uint8 indTakeOver;
2512 Uint8 lastReplicaNo;
2513 Uint8 lockType;
2514 Uint8 nextSeqNoReplica;
2515 Uint8 opSimple;
2516 Uint8 opExec;
2517 Uint8 operation;
2518 Uint8 m_reorg;
2519 Uint8 reclenAiLqhkey;
2520 Uint8 replicaType;
2521 Uint8 seqNoReplica;
2522 Uint8 tcNodeFailrec;
2523 Uint8 m_disk_table;
2524 Uint8 m_use_rowid;
2525 enum dealloc_states {
2526 /*
2527 * Example set of dealloc ops:
2528 *
2529 * Counting op (C)
2530 * m_dealloc_state = DA_DEALLOC_COUNT
2531 * m_dealloc_ref_count= 4
2532 *
2533 * Other op (A)
2534 * m_dealloc_state = DA_DEALLOC_REFERENCE
2535 * m_dealloc_op_id = C
2536 *
2537 * Other op (B)
2538 * m_dealloc_state = DA_DEALLOC_REFERENCE
2539 * m_dealloc_op_id = C
2540 *
2541 * Other op (D)
2542 * m_dealloc_state = DA_DEALLOC_REFERENCE
2543 * m_dealloc_op_id = C
2544 */
2545 DA_IDLE, // No deallocation
2546 DA_DEALLOC_COUNT, // Counting op live, counting references
2547 DA_DEALLOC_COUNT_ZOMBIE, // Counting op zombie, counting references
2548 DA_DEALLOC_REFERENCE // !Counting op, refers to counting op
2549 };
2550 Uint8 m_dealloc_state;
2551 Uint8 m_fire_trig_pass;
2552 Uint8 m_committed_log_space;
2553 enum op_flags {
2554 OP_ISLONGREQ = 0x1,
2555 OP_SAVEATTRINFO = 0x2,
2556 OP_SCANKEYINFOPOSSAVED = 0x4,
2557 OP_DEFERRED_CONSTRAINTS = 0x8,
2558 OP_NORMAL_PROTOCOL = 0x10,
2559 OP_DISABLE_FK = 0x20,
2560 OP_NO_TRIGGERS = 0x40,
2561 OP_NOWAIT = 0x80
2562 };
2563 Uint32 m_flags;
2564 Uint32 m_log_part_ptr_i;
2565 union {
2566 // op count, m_dealloc_state = DA_DEALLOC_COUNT[_ZOMBIE]
2567 Uint32 m_dealloc_ref_count;
2568 // reference to counting op, m_dealloc_state = DA_DEALLOC_REFERENCE
2569 Uint32 m_dealloc_op_id;
2570 // unused, m_dealloc_state = DA_IDLE
2571 Uint32 m_unused;
2572 } m_dealloc_data;
2573 Local_key m_row_id;
2574
2575 /**
2576 * Variables only used for Write operations.
2577 */
2578 Uint16 logStartFileNo;
2579 Uint16 logStartPageIndex;
2580 Uint16 logStartPageNo;
2581 Uint16 logStopPageNo;
2582
2583 /**
2584 * Variables used in special cases like Copy Fragment, special
2585 * scan variants and special commit and abort situations.
2586 */
2587 UintR copyCountWords;
2588 UintR reqRef;
2589 BlockReference reqBlockref;
2590 Uint32 m_corrFactorLo; // For result correlation for linked operations.
2591 Uint32 m_corrFactorHi;
2592 SectionReader::PosInfo scanKeyInfoPos;
2593
2594 struct {
2595 Uint32 m_cnt;
2596 Uint32 m_page_id[2];
2597 Local_key m_disk_ref[2];
2598 } m_nr_delete;
2599 Uint32 accOpPtr; /* for scan lock take over */
2600 #endif // DBLQH_STATE_EXTRACT
2601 }; /* p2c: size = 308 bytes */
2602
2603 #ifndef DBLQH_STATE_EXTRACT
2604 STATIC_CONST(DBLQH_OPERATION_RECORD_TRANSIENT_POOL_INDEX = 0);
2605 Uint32 ctcConnectReservedCount;
2606 Uint32 ctcConnectReserved;
2607 typedef Ptr<TcConnectionrec> TcConnectionrecPtr;
2608 typedef TransientPool<TcConnectionrec> TcConnectionrec_pool;
2609 TcConnectionrec_pool tcConnect_pool;
2610 TcConnectionrecPtr m_tc_connect_ptr;
2611 UintR cfirstfreeTcConrec;
2612 Uint32 ctcNumFree;
2613
2614 struct TcNodeFailRecord {
2615 enum TcFailStatus {
2616 TC_STATE_TRUE = 0,
2617 TC_STATE_FALSE = 1,
2618 TC_STATE_BREAK = 2
2619 };
2620 UintR lastNewTcRef;
2621 UintR newTcRef;
2622 TcFailStatus tcFailStatus;
2623 UintR tcRecNow;
2624 BlockReference lastNewTcBlockref;
2625 BlockReference newTcBlockref;
2626 Uint32 lastTakeOverInstanceId;
2627 Uint32 takeOverInstanceId;
2628 Uint32 maxInstanceId;
2629 Uint16 oldNodeId;
2630 };
2631 typedef Ptr<TcNodeFailRecord> TcNodeFailRecordPtr;
2632
2633 struct CommitLogRecord {
2634 Uint32 startPageNo;
2635 Uint32 startPageIndex;
2636 Uint32 stopPageNo;
2637 Uint32 fileNo;
2638 };
2639 //for statistic information about redo log initialization
2640 Uint32 totalLogFiles;
2641 Uint32 logFileInitDone;
2642 Uint32 totallogMBytes;
2643 Uint32 logMBytesInitDone;
2644
2645 Uint32 m_startup_report_frequency;
2646 NDB_TICKS m_last_report_time;
2647
2648 struct LocalSysfileStruct
2649 {
LocalSysfileStructDblqh::LocalSysfileStruct2650 LocalSysfileStruct() {}
2651 Uint32 m_node_restorable_on_its_own;
2652 Uint32 m_max_gci_restorable;
2653 Uint32 m_dihPtr;
2654 Uint32 m_dihRef;
2655 Uint32 m_save_gci;
2656 } c_local_sysfile;
2657 void send_read_local_sysfile(Signal*);
2658 void write_local_sysfile_restore_complete(Signal*);
2659 void write_local_sysfile_gcp_complete(Signal *signal, Uint32 gci);
2660 void write_local_sysfile_gcp_complete_late(Signal *signal, Uint32 gci);
2661 void write_local_sysfile_restart_complete(Signal*);
2662 void write_local_sysfile_restore_complete_done(Signal*);
2663 void write_local_sysfile_gcp_complete_done(Signal *signal);
2664
2665 void write_local_sysfile_restart_complete_done(Signal*);
2666
2667 void write_local_sysfile(Signal*, Uint32, Uint32);
2668 void sendLCP_FRAG_ORD(Signal*, Uint32 fragPtrI);
2669
2670 public:
2671 Dblqh(Block_context& ctx, Uint32 instanceNumber = 0);
2672 virtual ~Dblqh();
2673
2674 void execLOCAL_LATEST_LCP_ID_REP(Signal*);
2675 void execTUPKEYCONF(Signal* signal);
2676 Uint32 get_scan_api_op_ptr(Uint32 scan_ptr_i);
2677
2678 Uint32 rt_break_is_scan_prioritised(Uint32 scan_ptr_i);
2679 Uint32 getCreateSchemaVersion(Uint32 tableId);
2680
2681 void execNEXT_SCANCONF(Signal* signal);
2682 void setup_scan_pointers(Uint32 scanPtrI);
2683 void setup_scan_pointers_from_tc_con(TcConnectionrecPtr);
2684 void setup_key_pointers(Uint32 tcIndex);
2685 void exec_next_scan_conf(Signal *signal);
2686 void exec_next_scan_ref(Signal *signal);
2687 void continue_next_scan_conf(Signal *signal,
2688 ScanRecord::ScanState scanState,
2689 ScanRecord * const scanPtr);
2690 private:
2691
2692 BLOCK_DEFINES(Dblqh);
2693
is_scan_from_backup_block(BlockReference resultRef)2694 bool is_scan_from_backup_block(BlockReference resultRef)
2695 {
2696 NodeId nodeId = refToNode(resultRef);
2697 Uint32 block = refToMain(resultRef);
2698 if (nodeId != getOwnNodeId())
2699 return false;
2700 if (block == BACKUP)
2701 return true;
2702 return false;
2703 }
is_prioritised_scan(BlockReference resultRef)2704 bool is_prioritised_scan(BlockReference resultRef)
2705 {
2706 /**
2707 * Scans that return data within the same thread to the
2708 * BACKUP and DBLQH block are always prioritised (LCP
2709 * scans, Backup scans and node recovery scans.
2710 */
2711 NodeId nodeId = refToNode(resultRef);
2712 Uint32 block = refToMain(resultRef);
2713 if (nodeId != getOwnNodeId())
2714 return false;
2715 if (block == BACKUP ||
2716 block == DBLQH)
2717 return true;
2718 return false;
2719 }
2720
2721 void execPACKED_SIGNAL(Signal* signal);
2722 void execDEBUG_SIG(Signal* signal);
2723 void execATTRINFO(Signal* signal);
2724 void execKEYINFO(Signal* signal);
2725 void execLQHKEYREQ(Signal* signal);
2726 void execLQHKEYREF(Signal* signal);
2727 void execCOMMIT(Signal* signal);
2728 void execCOMPLETE(Signal* signal);
2729 void execLQHKEYCONF(Signal* signal);
2730 void execTESTSIG(Signal* signal);
2731 void execLQH_RESTART_OP(Signal* signal);
2732 void execCONTINUEB(Signal* signal);
2733 void execSTART_RECREQ(Signal* signal);
2734 void execSTART_RECCONF(Signal* signal);
2735 void execEXEC_FRAGREQ(Signal* signal);
2736 void execEXEC_FRAGCONF(Signal* signal);
2737 void execEXEC_FRAGREF(Signal* signal);
2738 void execSTART_EXEC_SR(Signal* signal);
2739 void execEXEC_SRREQ(Signal* signal);
2740 void execEXEC_SRCONF(Signal* signal);
2741 void execREAD_PSEUDO_REQ(Signal* signal);
2742 void execSIGNAL_DROPPED_REP(Signal* signal);
2743
2744 void execDBINFO_SCANREQ(Signal* signal);
2745 void execDUMP_STATE_ORD(Signal* signal);
2746 void execACC_ABORTCONF(Signal* signal);
2747 void execNODE_FAILREP(Signal* signal);
2748 void execCHECK_LCP_STOP(Signal* signal);
2749 void execSEND_PACKED(Signal* signal);
2750 void execTUP_ATTRINFO(Signal* signal);
2751 void execREAD_CONFIG_REQ(Signal* signal);
2752
2753 void execCREATE_TAB_REQ(Signal* signal);
2754 void execCREATE_TAB_REF(Signal* signal);
2755 void execCREATE_TAB_CONF(Signal* signal);
2756 void execLQHADDATTREQ(Signal* signal);
2757 void execTUP_ADD_ATTCONF(Signal* signal);
2758 void execTUP_ADD_ATTRREF(Signal* signal);
2759
2760 void execLQHFRAGREQ(Signal* signal);
2761 void execACCFRAGCONF(Signal* signal);
2762 void execACCFRAGREF(Signal* signal);
2763 void execTUPFRAGCONF(Signal* signal);
2764 void execTUPFRAGREF(Signal* signal);
2765
2766 void execDROP_FRAG_REQ(Signal*);
2767 void execDROP_FRAG_REF(Signal*);
2768 void execDROP_FRAG_CONF(Signal*);
2769
2770 void insert_new_fragments_into_lcp(Signal*);
2771 void execWAIT_LCP_IDLE_CONF(Signal*);
2772 void execTAB_COMMITREQ(Signal* signal);
2773 void execACCSEIZECONF(Signal* signal);
2774 void execACCSEIZEREF(Signal* signal);
2775 void execREAD_NODESCONF(Signal* signal);
2776 void execREAD_NODESREF(Signal* signal);
2777 void execSTTOR(Signal* signal);
2778 void execNDB_STTOR(Signal* signal);
2779 void execTUPSEIZECONF(Signal* signal);
2780 void execTUPSEIZEREF(Signal* signal);
2781 void execACCKEYCONF(Signal* signal);
2782 void execACCKEYREF(Signal* signal);
2783 void execTUPKEYREF(Signal* signal);
2784 void execABORT(Signal* signal);
2785 void execABORTREQ(Signal* signal);
2786 void execCOMMITREQ(Signal* signal);
2787 void execCOMPLETEREQ(Signal* signal);
2788 void execMEMCHECKREQ(Signal* signal);
2789 void execSCAN_FRAGREQ(Signal* signal);
2790 void execSCAN_NEXTREQ(Signal* signal);
2791 void execACC_SCANREF(Signal* signal, TcConnectionrecPtr);
2792 void execNEXT_SCANREF(Signal* signal);
2793 void execACC_CHECK_SCAN(Signal* signal);
2794 void execACC_TO_REF(Signal* signal, TcConnectionrecPtr);
2795 void execCOPY_FRAGREQ(Signal* signal);
2796 void execCOPY_FRAGREF(Signal* signal);
2797 void execCOPY_FRAGCONF(Signal* signal);
2798 void execPREPARE_COPY_FRAG_REQ(Signal* signal);
2799 void execUPDATE_FRAG_DIST_KEY_ORD(Signal*);
2800 void execCOPY_ACTIVEREQ(Signal* signal);
2801 void execLQH_TRANSREQ(Signal* signal);
2802 void execTRANSID_AI(Signal* signal);
2803 void execINCL_NODEREQ(Signal* signal);
2804
2805 void force_lcp(Signal* signal);
2806 void execLCP_FRAG_ORD(Signal* signal);
2807
2808 void execSTART_FRAGREQ(Signal* signal);
2809 void execSTART_RECREF(Signal* signal);
2810
2811 void execGCP_SAVEREQ(Signal* signal);
2812 void execSUB_GCP_COMPLETE_REP(Signal* signal);
2813 void execFSOPENREF(Signal* signal);
2814 void execFSOPENCONF(Signal* signal);
2815 void execFSCLOSECONF(Signal* signal);
2816 void execFSWRITECONF(Signal* signal);
2817 void execFSWRITEREF(Signal* signal);
2818 void execFSREADCONF(Signal* signal);
2819 void execFSREADREF(Signal* signal);
2820 void execFSWRITEREQ(Signal*);
2821 void execTIME_SIGNAL(Signal* signal);
2822 void execFSSYNCCONF(Signal* signal);
2823
2824 void execALTER_TAB_REQ(Signal* signal);
2825 void execALTER_TAB_CONF(Signal* signal);
2826
2827 void execCREATE_TRIG_IMPL_CONF(Signal* signal);
2828 void execCREATE_TRIG_IMPL_REF(Signal* signal);
2829 void execCREATE_TRIG_IMPL_REQ(Signal* signal);
2830
2831 void execDROP_TRIG_IMPL_CONF(Signal* signal);
2832 void execDROP_TRIG_IMPL_REF(Signal* signal);
2833 void execDROP_TRIG_IMPL_REQ(Signal* signal);
2834
2835 void execPREP_DROP_TAB_REQ(Signal* signal);
2836 void execDROP_TAB_REQ(Signal* signal);
2837 void execDROP_TAB_REF(Signal*);
2838 void execDROP_TAB_CONF(Signal*);
2839 void dropTable_nextStep(Signal*, AddFragRecordPtr);
2840
2841 void execTUP_DEALLOCREQ(Signal* signal);
2842 void execLQH_WRITELOG_REQ(Signal* signal);
2843
2844 void execTUXFRAGCONF(Signal* signal);
2845 void execTUXFRAGREF(Signal* signal);
2846 void execTUX_ADD_ATTRCONF(Signal* signal);
2847 void execTUX_ADD_ATTRREF(Signal* signal);
2848
2849 void execBUILD_INDX_IMPL_REF(Signal* signal);
2850 void execBUILD_INDX_IMPL_CONF(Signal* signal);
2851
2852 void execFIRE_TRIG_REQ(Signal*);
2853
2854 void execREAD_LOCAL_SYSFILE_CONF(Signal*);
2855 void execWRITE_LOCAL_SYSFILE_CONF(Signal*);
2856
2857 void execSTART_NODE_LCP_REQ(Signal*);
2858 void execSTART_LOCAL_LCP_ORD(Signal*);
2859 void execSTART_FULL_LOCAL_LCP_ORD(Signal*);
2860 void execUNDO_LOG_LEVEL_REP(Signal*);
2861 void execHALT_COPY_FRAG_REQ(Signal*);
2862 void execHALT_COPY_FRAG_CONF(Signal*);
2863 void execHALT_COPY_FRAG_REF(Signal*);
2864 void execRESUME_COPY_FRAG_REQ(Signal*);
2865 void execRESUME_COPY_FRAG_CONF(Signal*);
2866 void execRESUME_COPY_FRAG_REF(Signal*);
2867 // Statement blocks
2868
2869 void send_halt_copy_frag(Signal*);
2870 void send_resume_copy_frag(Signal*);
2871 void send_halt_copy_frag_conf(Signal*, bool);
2872 void send_resume_copy_frag_conf(Signal*);
2873
2874 void sendLOCAL_RECOVERY_COMPLETE_REP(Signal *signal,
2875 LocalRecoveryCompleteRep::PhaseIds);
2876 void timer_handling(Signal *signal);
2877 void init_acc_ptr_list(ScanRecord*);
2878 bool seize_acc_ptr_list(ScanRecord*, Uint32, Uint32);
2879 void release_acc_ptr_list(ScanRecord*);
2880 Uint32 get_acc_ptr_from_scan_record(ScanRecord*, Uint32, bool);
2881 void set_acc_ptr_in_scan_record(ScanRecord*, Uint32, Uint32);
2882 void i_get_acc_ptr(ScanRecord*, Uint32*&, Uint32);
2883
2884 void removeTable(Uint32 tableId);
2885 void sendLCP_COMPLETE_REP(Signal* signal, Uint32 lcpId);
2886 void sendLCP_FRAGIDREQ(Signal* signal);
2887 void sendLCP_FRAG_REP(Signal * signal, const LcpRecord::FragOrd &,
2888 const Fragrecord*) const;
2889
2890 void updatePackedList(Signal* signal, HostRecord * ahostptr, Uint16 hostId);
2891 void LQHKEY_abort(Signal* signal, int errortype, TcConnectionrecPtr);
2892 void LQHKEY_error(Signal* signal, int errortype);
2893 void nextRecordCopy(Signal* signal, TcConnectionrecPtr);
2894 Uint32 calculateHash(Uint32 tableId, const Uint32* src);
2895 void sendCommittedTc(Signal* signal,
2896 BlockReference atcBlockref,
2897 const TcConnectionrec*);
2898 void sendCompletedTc(Signal* signal,
2899 BlockReference atcBlockref,
2900 const TcConnectionrec*);
2901 void sendLqhkeyconfTc(Signal* signal,
2902 BlockReference atcBlockref,
2903 TcConnectionrecPtr);
2904 void sendBatchedLqhkeyreq(Signal* signal,
2905 Uint32 lqhRef,
2906 Uint32 siglen,
2907 SectionHandle* handle);
2908 void sendCommitLqh(Signal* signal,
2909 BlockReference alqhBlockref,
2910 const TcConnectionrec*);
2911 void sendCompleteLqh(Signal* signal,
2912 BlockReference alqhBlockref,
2913 const TcConnectionrec*);
2914 void sendPackedSignal(Signal* signal,
2915 struct PackedWordsContainer * container);
2916 void cleanUp(Signal* signal, TcConnectionrecPtr);
2917 void sendAttrinfoLoop(Signal* signal);
2918 void sendAttrinfoSignal(Signal* signal);
2919 void sendLqhAttrinfoSignal(Signal* signal);
2920 Uint32 initScanrec(const class ScanFragReq *,
2921 Uint32 aiLen,
2922 TcConnectionrecPtr);
2923 void initScanTc(const class ScanFragReq *,
2924 Uint32 transid1,
2925 Uint32 transid2,
2926 Uint32 fragId,
2927 Uint32 nodeId,
2928 Uint32 hashHi,
2929 TcConnectionrecPtr);
2930 bool finishScanrec(Signal* signal,
2931 ScanRecordPtr &restart,
2932 TcConnectionrecPtr);
2933 void handle_finish_scan(Signal*, TcConnectionrecPtr);
2934 void releaseScanrec(Signal* signal);
2935 void seizeScanrec(Signal* signal);
2936 Uint32 sendKeyinfo20(Signal* signal, ScanRecord *, TcConnectionrec *);
2937 void sendTCKEYREF(Signal*, Uint32 dst, Uint32 route, Uint32 cnt);
2938 void sendScanFragConf(Signal* signal,
2939 Uint32 scanCompleted,
2940 const TcConnectionrec*);
2941
2942 void send_next_NEXT_SCANREQ(Signal* signal,
2943 SimulatedBlock* block,
2944 ExecFunction f,
2945 ScanRecord * const scanPtr,
2946 Uint32 clientPtrI);
2947
2948 void initCopyrec(Signal* signal);
2949 void initCopyTc(Signal* signal, Operation_t, TcConnectionrec*);
2950 void sendCopyActiveConf(Signal* signal,Uint32 tableId);
2951 void checkLcpCompleted(Signal* signal);
2952 void checkLcpHoldop(Signal* signal);
2953 bool checkLcpStarted(Signal* signal);
2954 void checkLcpTupprep(Signal* signal);
2955 void getNextFragForLcp(Signal* signal);
2956 void sendAccContOp(Signal* signal);
2957 void setLogTail(Signal* signal, Uint32 keepGci);
2958 Uint32 remainingLogSize(const LogFileRecordPtr &sltCurrLogFilePtr,
2959 const LogPartRecordPtr &sltLogPartPtr);
2960 bool checkGcpCompleted(Signal* signal, Uint32 pageWritten, Uint32 wordWritten);
2961 void initFsopenconf(Signal* signal);
2962 void initFsrwconf(Signal* signal, bool write);
2963 void initLfo(Signal* signal);
2964 void initLogfile(Signal* signal, Uint32 fileNo);
2965 void initLogpage(Signal* signal);
2966 void openFileRw(Signal* signal, LogFileRecordPtr olfLogFilePtr, bool writeBuffer = true);
2967 void openLogfileInit(Signal* signal);
2968 void openNextLogfile(Signal* signal);
2969 void releaseLfo(Signal* signal);
2970 void releaseLfoPages(Signal* signal);
2971 void releaseLogpage(Signal* signal);
2972 void seizeLfo(Signal* signal);
2973 void seizeLogfile(Signal* signal);
2974 void seizeLogpage(Signal* signal);
2975 void writeFileDescriptor(Signal* signal);
2976 void writeFileHeaderOpen(Signal* signal, Uint32 type);
2977 void writeInitMbyte(Signal* signal);
2978 void writeSinglePage(Signal* signal, Uint32 pageNo,
2979 Uint32 wordWritten, Uint32 place,
2980 bool sync = true);
2981 void buildLinkedLogPageList(Signal* signal);
2982 void changeMbyte(Signal* signal);
2983 Uint32 checkIfExecLog(Signal* signal, TcConnectionrecPtr);
2984 void checkNewMbyte(Signal* signal, const TcConnectionrec*);
2985 void checkReadExecSr(Signal* signal);
2986 void checkScanTcCompleted(Signal* signal, TcConnectionrecPtr);
2987 void closeFile(Signal* signal, LogFileRecordPtr logFilePtr, Uint32 place);
2988 void completedLogPage(Signal* signal,
2989 Uint32 clpType,
2990 Uint32 place,
2991 bool sync_flag = false);
2992
2993 void commit_reorg(TablerecPtr tablePtr);
2994 void wait_reorg_suma_filter_enabled(Signal*);
2995
2996 void deleteFragrec(Uint32 fragId);
2997 void deleteTransidHash(Signal* signal, TcConnectionrecPtr& tcConnectptr);
2998 void findLogfile(Signal* signal,
2999 Uint32 fileNo,
3000 LogPartRecordPtr flfLogPartPtr,
3001 LogFileRecordPtr* parLogFilePtr);
3002 void findPageRef(Signal* signal, CommitLogRecord* commitLogRecord);
3003 int findTransaction(UintR Transid1,
3004 UintR Transid2,
3005 UintR TcOprec,
3006 UintR hi,
3007 TcConnectionrecPtr& tcConnectptr);
3008 void getFirstInLogQueue(Signal* signal, Ptr<TcConnectionrec>&dst);
3009 void remove_from_prepare_log_queue(Signal *signal,
3010 TcConnectionrecPtr tcPtr);
3011 bool getFragmentrec(Signal* signal, Uint32 fragId);
3012 void initialiseAddfragrec(Signal* signal);
3013 void initialiseFragrec(Signal* signal);
3014 void initialiseGcprec(Signal* signal);
3015 void initialiseLcpRec(Signal* signal);
3016 void initialiseLfo(Signal* signal);
3017 void initialiseLogFile(Signal* signal);
3018 void initialiseLogPage(Signal* signal);
3019 void initialiseLogPart(Signal* signal);
3020 void initialisePageRef(Signal* signal);
3021 void initialiseScanrec(Signal* signal);
3022 void initialiseTabrec(Signal* signal);
3023 void initialiseTcrec(Signal* signal);
3024 void initialiseTcNodeFailRec(Signal* signal);
3025 void initFragrec(Signal* signal,
3026 Uint32 tableId,
3027 Uint32 fragId,
3028 Uint32 copyType);
3029 void initFragrecSr(Signal* signal);
3030 void initGciInLogFileRec(Signal* signal, Uint32 noFdDesc);
3031 void initLogpart(Signal* signal);
3032 void initLogPointers(Signal* signal, TcConnectionrecPtr);
3033 void initReqinfoExecSr(Signal* signal, TcConnectionrecPtr);
3034 bool insertFragrec(Signal* signal, Uint32 fragId);
3035 void linkWaitLog(Signal*,
3036 LogPartRecordPtr,
3037 LogPartRecord::OperationQueue &,
3038 TcConnectionrecPtr);
3039 void logNextStart(Signal* signal);
3040 void moveToPageRef(Signal* signal);
3041 void readAttrinfo(Signal* signal, TcConnectionrecPtr);
3042 void readCommitLog(Signal* signal,
3043 CommitLogRecord* commitLogRecord,
3044 TcConnectionrecPtr);
3045 void readExecLog(Signal* signal);
3046 void readExecSrNewMbyte(Signal* signal);
3047 void readExecSr(Signal* signal);
3048 void readKey(Signal* signal, TcConnectionrecPtr);
3049 void readLogData(Signal* signal, Uint32 noOfWords, Uint32& sectionIVal);
3050 void readLogHeader(Signal* signal, TcConnectionrecPtr);
3051 Uint32 readLogword(Signal* signal);
3052 Uint32 readLogwordExec(Signal* signal);
3053 void readSinglePage(Signal* signal, Uint32 pageNo);
3054 void releaseActiveCopy(Signal* signal);
3055 void releaseAddfragrec(Signal* signal);
3056 void releaseFragrec();
3057 void releaseOprec(Signal* signal, TcConnectionrecPtr);
3058 void releasePageRef(Signal* signal);
3059 void releaseMmPages(Signal* signal);
3060 void releasePrPages(Signal* signal);
3061 void releaseTcrec(Signal* signal, TcConnectionrecPtr);
3062 void releaseTcrecLog(Signal* signal, TcConnectionrecPtr);
3063 void removeLogTcrec(Signal* signal, TcConnectionrecPtr);
3064 void removePageRef(Signal* signal);
3065 Uint32 returnExecLog(Signal* signal, TcConnectionrecPtr);
3066 int saveAttrInfoInSection(const Uint32* dataPtr,
3067 Uint32 len,
3068 TcConnectionrec*);
3069 void seizeAddfragrec(Signal* signal);
3070 Uint32 seizeSingleSegment();
3071 Uint32 copyNextRange(Uint32 * dst, TcConnectionrec*);
3072
3073 void seizeFragmentrec(Signal* signal);
3074 void seizePageRef(Signal* signal);
3075 void seizeTcrec(TcConnectionrecPtr& tcConnectptr);
3076 void sendAborted(Signal* signal, TcConnectionrecPtr);
3077 void sendLqhTransconf(Signal* signal,
3078 LqhTransConf::OperationStatus,
3079 TcConnectionrecPtr);
3080 void sendTupkey(Signal* signal, const TcConnectionrec*);
3081 void startExecSr(Signal* signal);
3082 void startNextExecSr(Signal* signal);
3083 void startTimeSupervision(Signal* signal);
3084 void stepAhead(Signal* signal, Uint32 stepAheadWords);
3085 void systemError(Signal* signal, int line);
3086 void writeAbortLog(Signal* signal,
3087 TcConnectionrec*,
3088 LogPartRecord*);
3089 void writeCommitLog(Signal* signal,
3090 LogPartRecordPtr regLogPartPtr,
3091 TcConnectionrec*);
3092 void writeCompletedGciLog(Signal* signal);
3093 void writeDbgInfoPageHeader(LogPageRecordPtr logPagePtr, Uint32 place,
3094 Uint32 pageNo, Uint32 wordWritten);
3095 void writeDirty(Signal* signal, Uint32 place);
3096 void writeKey(Signal* signal, const TcConnectionrec*);
3097 void writeLogHeader(Signal* signal,
3098 const TcConnectionrec*,
3099 LogPartRecord*);
3100 void writeLogWord(Signal* signal, Uint32 data);
3101 void writeLogWords(Signal* signal, const Uint32* data, Uint32 len);
3102 void writeNextLog(Signal* signal);
3103 void errorReport(Signal* signal, int place);
3104 void warningReport(Signal* signal, int place);
3105 void invalidateLogAfterLastGCI(Signal *signal);
3106 Uint32 nextLogFilePtr(Uint32 logFilePtrI);
3107 void readFileInInvalidate(Signal *signal, int stepNext);
3108 void writeFileInInvalidate(Signal *signal, int stepPrev);
3109 bool invalidateCloseFile(Signal*, Ptr<LogPartRecord>, Ptr<LogFileRecord>,
3110 LogFileRecord::LogFileStatus status);
3111 void exitFromInvalidate(Signal* signal);
3112 Uint32 calcPageCheckSum(LogPageRecordPtr logP);
3113 Uint32 handleLongTupKey(Signal* signal,
3114 Uint32* dataPtr,
3115 Uint32 len,
3116 TcConnectionrec*);
3117
3118 void rebuildOrderedIndexes(Signal* signal, Uint32 tableId);
3119
3120 // Generated statement blocks
3121 [[noreturn]] void systemErrorLab(Signal* signal, int line);
3122 void initFourth(Signal* signal);
3123 void packLqhkeyreqLab(Signal* signal, TcConnectionrecPtr);
3124 void sendNdbSttorryLab(Signal* signal);
3125 void execSrCompletedLab(Signal* signal);
3126 void execLogRecord(Signal* signal);
3127 void srPhase3Comp(Signal* signal);
3128 void srLogLimits(Signal* signal);
3129 void srGciLimits(Signal* signal);
3130 void srPhase3Start(Signal* signal);
3131 void checkStartCompletedLab(Signal* signal);
3132 void continueAbortLab(Signal* signal, TcConnectionrecPtr);
3133 void abortContinueAfterBlockedLab(Signal* signal, TcConnectionrec*);
3134 void abortCommonLab(Signal* signal, TcConnectionrecPtr);
3135 void localCommitLab(Signal* signal, TcConnectionrecPtr);
3136 void abortErrorLab(Signal* signal, TcConnectionrecPtr);
3137 void continueAfterReceivingAllAiLab(Signal* signal, TcConnectionrecPtr);
3138 void continueACCKEYCONF(Signal* signal,
3139 Uint32 localKey1,
3140 Uint32 localKey2,
3141 TcConnectionrecPtr);
3142 void abortStateHandlerLab(Signal* signal, TcConnectionrecPtr);
3143 void writeAttrinfoLab(Signal* signal, const TcConnectionrec*);
3144 void scanAttrinfoLab(Signal* signal,
3145 Uint32* dataPtr,
3146 Uint32 length,
3147 TcConnectionrecPtr);
3148 void abort_scan(Signal* signal,
3149 Uint32 scan_ptr_i,
3150 Uint32 errcode,
3151 TcConnectionrecPtr);
3152 void localAbortStateHandlerLab(Signal* signal, TcConnectionrecPtr);
3153 void logLqhkeyreqLab(Signal* signal, TcConnectionrecPtr);
3154 void logLqhkeyreqLab_problems(Signal* signal, TcConnectionrecPtr);
3155 void update_log_problem(Signal*, LogPartRecordPtr, Uint32 problem, bool);
3156 void lqhAttrinfoLab(Signal* signal,
3157 Uint32* dataPtr,
3158 Uint32 length,
3159 TcConnectionrecPtr);
3160 void rwConcludedAiLab(Signal* signal, TcConnectionrecPtr);
3161 void aiStateErrorCheckLab(Signal* signal,
3162 Uint32* dataPtr,
3163 Uint32 length,
3164 TcConnectionrecPtr);
3165 void takeOverErrorLab(Signal* signal, TcConnectionrecPtr);
3166 void endgettupkeyLab(Signal* signal, TcConnectionrecPtr);
3167 bool checkTransporterOverloaded(Signal* signal,
3168 const NodeBitmask& all,
3169 const class LqhKeyReq* req);
3170 void earlyKeyReqAbort(Signal* signal,
3171 const class LqhKeyReq * lqhKeyReq,
3172 bool isLongReq,
3173 Uint32 errorCode,
3174 TcConnectionrecPtr);
3175 void logLqhkeyrefLab(Signal* signal, TcConnectionrecPtr);
3176 void closeCopyLab(Signal* signal, TcConnectionrec*);
3177 void commitReplyLab(Signal* signal, TcConnectionrec*);
3178 void completeUnusualLab(Signal* signal, TcConnectionrecPtr);
3179 void completeTransNotLastLab(Signal* signal, TcConnectionrecPtr);
3180 void completedLab(Signal* signal, TcConnectionrecPtr);
3181 void copyCompletedLab(Signal* signal, TcConnectionrecPtr);
3182 void completeLcpRoundLab(Signal* signal, Uint32 lcpId);
3183 void continueAfterLogAbortWriteLab(Signal* signal, TcConnectionrecPtr);
3184 void sendAttrinfoLab(Signal* signal);
3185 void sendExecConf(Signal* signal);
3186 void execSr(Signal* signal);
3187 void srFourthComp(Signal* signal);
3188 void timeSup(Signal* signal);
3189 void closeCopyRequestLab(Signal* signal, TcConnectionrecPtr);
3190 void closeScanRequestLab(Signal* signal, TcConnectionrecPtr, bool setup);
3191 void scanTcConnectLab(Signal* signal, Uint32 startTcCon, Uint32 fragId);
3192 void initGcpRecLab(Signal* signal);
3193 void prepareContinueAfterBlockedLab(Signal* signal, TcConnectionrecPtr);
3194 void commitContinueAfterBlockedLab(Signal* signal, TcConnectionrecPtr);
3195 void sendExecFragRefLab(Signal* signal);
3196 void fragrefLab(Signal* signal, Uint32 errorCode, const LqhFragReq* req);
3197 void abortAddFragOps(Signal* signal);
3198 void rwConcludedLab(Signal* signal, TcConnectionrecPtr);
3199 void sendsttorryLab(Signal* signal);
3200 void initialiseRecordsLab(Signal* signal, Uint32 data, Uint32, Uint32);
3201 void startphase2Lab(Signal* signal, Uint32 config);
3202 void startphase3Lab(Signal* signal);
3203 void startphase6Lab(Signal* signal);
3204 void moreconnectionsLab(Signal* signal, TcConnectionrecPtr);
3205 void scanReleaseLocksLab(Signal* signal, TcConnectionrec*);
3206 void closeScanLab(Signal* signal, TcConnectionrec*);
3207 void scanNextLoopLab(Signal* signal,
3208 Uint32 clientPtrI,
3209 Uint32 accOpPtr,
3210 ScanRecord * const scanPtr,
3211 Fragrecord * const fragPtr);
3212 void commitReqLab(Signal* signal,
3213 Uint32 gci_hi,
3214 Uint32 gci_lo,
3215 TcConnectionrecPtr);
3216 void completeTransLastLab(Signal* signal, TcConnectionrecPtr);
3217 void restart_queued_scan(Signal*, Uint32);
3218 void tupScanCloseConfLab(Signal* signal, TcConnectionrecPtr);
3219 void tupCopyCloseConfLab(Signal* signal, TcConnectionrecPtr);
3220 void accScanCloseConfLab(Signal* signal, TcConnectionrecPtr);
3221 void accCopyCloseConfLab(Signal* signal, TcConnectionrecPtr);
3222 void nextScanConfScanLab(Signal* signal,
3223 ScanRecord * const scanPtr,
3224 Uint32 fragId,
3225 Uint32 accOpPtr,
3226 TcConnectionrecPtr);
3227 void nextScanConfCopyLab(Signal* signal, TcConnectionrecPtr);
3228 void continueScanNextReqLab(Signal* signal, TcConnectionrec*);
3229 bool keyinfoLab(const Uint32 * src, Uint32 len, TcConnectionrecPtr);
3230 void copySendTupkeyReqLab(Signal* signal);
3231 void storedProcConfScanLab(Signal* signal, TcConnectionrecPtr);
3232 void copyStateFinishedLab(Signal* signal);
3233 void lcpCompletedLab(Signal* signal);
3234 void lcpStartedLab(Signal* signal);
3235 void completed_fragment_checkpoint(Signal *signal,
3236 const LcpRecord::FragOrd & fragOrd);
3237 bool exec_prepare_next_fragment_checkpoint(Signal* signal,
3238 FragrecordPtr fragptr);
3239 void prepare_next_fragment_checkpoint(Signal*, bool);
3240 void perform_fragment_checkpoint(Signal *signal);
3241 void handleFirstFragment(Signal *signal);
3242 void startLcpRoundLab(Signal* signal);
3243 void startFragRefLab(Signal* signal);
3244 void move_start_gci_forward(Signal*, Uint32);
3245 void srCompletedLab(Signal* signal);
3246 void openFileInitLab(Signal* signal);
3247 void openSrFrontpageLab(Signal* signal);
3248 void openSrLastFileLab(Signal* signal);
3249 void openSrNextFileLab(Signal* signal);
3250 void openExecSrStartLab(Signal* signal);
3251 void openExecSrNewMbyteLab(Signal* signal);
3252 void openSrFourthPhaseLab(Signal* signal);
3253 void openSrFourthZeroSkipInitLab(Signal* signal);
3254 void openSrFourthZeroLab(Signal* signal);
3255 void openExecLogLab(Signal* signal);
3256 void checkInitCompletedLab(Signal* signal);
3257 void closingSrLab(Signal* signal);
3258 void closingSrFrontPage(Signal* signal);
3259 void closeExecSrLab(Signal* signal);
3260 void execLogComp(Signal* signal);
3261 void execLogComp_extra_files_closed(Signal* signal);
3262 void closeWriteLogLab(Signal* signal);
3263 void closeExecLogLab(Signal* signal);
3264 void writePageZeroLab(Signal* signal, Uint32 from);
3265 void lastWriteInFileLab(Signal* signal);
3266 void initWriteEndLab(Signal* signal);
3267 void initFirstPageLab(Signal* signal);
3268 void writeGciZeroLab(Signal* signal);
3269 void writeDirtyLab(Signal* signal);
3270 void writeInitMbyteLab(Signal* signal);
3271 void writeLogfileLab(Signal* signal);
3272 void firstPageWriteLab(Signal* signal);
3273 void readSrLastMbyteLab(Signal* signal);
3274 void readSrLastFileLab(Signal* signal);
3275 void readSrNextFileLab(Signal* signal);
3276 void readExecSrLab(Signal* signal);
3277 void readExecLogLab(Signal* signal);
3278 void readSrFourthPhaseLab(Signal* signal);
3279 void readSrFourthZeroLab(Signal* signal);
3280 void copyLqhKeyRefLab(Signal* signal, TcConnectionrecPtr);
3281 void restartOperationsLab(Signal* signal);
3282 void lqhTransNextLab(Signal* signal, TcNodeFailRecordPtr tcNodeFailPtr);
3283 void restartOperationsAfterStopLab(Signal* signal);
3284 void startphase1Lab(Signal* signal, Uint32 config, Uint32 nodeId);
3285 void tupkeyConfLab(Signal* signal,
3286 TcConnectionrecPtr);
3287 void copyTupkeyRefLab(Signal* signal, TcConnectionrecPtr);
3288 void copyTupkeyConfLab(Signal* signal, TcConnectionrecPtr);
3289 void scanTupkeyConfLab(Signal* signal, TcConnectionrec*);
3290 void scanTupkeyRefLab(Signal* signal, TcConnectionrecPtr);
3291 void accScanConfScanLab(Signal* signal, TcConnectionrecPtr);
3292 void accScanConfCopyLab(Signal* signal);
3293 void scanLockReleasedLab(Signal* signal, TcConnectionrec *);
3294 void openSrFourthNextLab(Signal* signal);
3295 void closingInitLab(Signal* signal);
3296 void closeExecSrCompletedLab(Signal* signal);
3297 void readSrFrontpageLab(Signal* signal);
3298
3299 void sendCreateTabReq(Signal*, AddFragRecordPtr);
3300 void sendAddAttrReq(Signal* signal);
3301 void sendAddFragReq(Signal* signal);
3302 void dropTab_wait_usage(Signal*);
3303 Uint32 get_table_state_error(Ptr<Tablerec> tabPtr) const;
3304 void wait_readonly(Signal*);
3305 int check_tabstate(Signal * signal,
3306 const Tablerec * tablePtrP,
3307 Uint32 op,
3308 TcConnectionrecPtr);
3309
3310 void remove_commit_marker(TcConnectionrec * const regTcPtr);
3311 // Initialisation
3312 void initData();
3313 void initRecords(const ndb_mgm_configuration_iterator *mgm_cfg);
3314 protected:
3315 virtual bool getParam(const char* name, Uint32* count);
3316
3317 public:
3318 void checkLcpStopBlockedLab(Signal* signal, Uint32);
3319 void lcp_max_completed_gci(Uint32 & maxCompletedGci,
3320 Uint32 max_gci_written,
3321 Uint32 restorable_gci);
3322 void lcp_complete_scan(Uint32 & newestGci);
3323 Uint32 get_lcp_newest_gci(void);
3324 void get_lcp_frag_stats(Uint64 & row_count,
3325 Uint64 & prev_row_count,
3326 Uint64 & row_change_count,
3327 Uint64 & memory_used_in_bytes,
3328 Uint32 & max_page_cnt);
3329 Uint32 get_current_local_lcp_id(void);
3330 void get_redo_stats(Uint64 &used_in_mbytes,
3331 Uint64 &size_in_mbytes,
3332 Uint64 &written_since_last_in_mbytes,
3333 Uint64 &updates,
3334 Uint64 &inserts,
3335 Uint64 &deletes);
3336
3337 private:
3338 bool validate_filter(Signal*);
3339 bool match_and_print(Signal*, Ptr<TcConnectionrec>);
3340 void ndbinfo_write_op(Ndbinfo::Row&, TcConnectionrecPtr tcPtr);
3341
3342 void define_backup(Signal*);
3343 void execDEFINE_BACKUP_REF(Signal*);
3344 void execDEFINE_BACKUP_CONF(Signal*);
3345 void execBACKUP_FRAGMENT_REF(Signal* signal);
3346 void execBACKUP_FRAGMENT_CONF(Signal* signal);
3347 void execLCP_START_REP(Signal *signal);
3348 void execLCP_PREPARE_REF(Signal* signal);
3349 void execLCP_PREPARE_CONF(Signal* signal);
3350 void execEND_LCPREF(Signal* signal);
3351 void execEND_LCPCONF(Signal* signal);
3352 void execINFORM_BACKUP_DROP_TAB_CONF(Signal *signal);
3353
3354 Uint32 m_backup_ptr;
3355 bool m_node_restart_lcp_second_phase_started;
3356 bool m_node_restart_first_local_lcp_started;
3357 Uint32 m_first_activate_fragment_ptr_i;
3358 Uint32 m_second_activate_fragment_ptr_i;
3359 Uint32 m_curr_lcp_id;
3360 Uint32 m_curr_local_lcp_id;
3361 Uint32 m_next_local_lcp_id;
3362 Uint32 c_saveLcpId;
3363 Uint32 c_restart_localLcpId;
3364 Uint32 c_restart_lcpId;
3365 Uint32 c_restart_maxLcpId;
3366 Uint32 c_restart_maxLocalLcpId;
3367
3368 void execWAIT_COMPLETE_LCP_REQ(Signal*);
3369 void execWAIT_ALL_COMPLETE_LCP_CONF(Signal*);
3370
3371 bool handle_lcp_fragment_first_phase(Signal*);
3372 void activate_redo_log(Signal*, Uint32, Uint32);
3373 void start_lcp_second_phase(Signal*);
3374 void complete_local_lcp(Signal*);
3375
3376 void send_restore_lcp(Signal * signal);
3377 void execRESTORE_LCP_REF(Signal* signal);
3378 void execRESTORE_LCP_CONF(Signal* signal);
3379
3380 /**
3381 * For periodic redo log file initialization status reporting
3382 * and explicit redo log file status reporting
3383 */
3384 /* Init at start of redo log file initialization, timers etc... */
3385 void initReportStatus(Signal* signal);
3386 /* Check timers for reporting at certain points */
3387 void checkReportStatus(Signal* signal);
3388 /* Send redo log file initialization status, invoked either periodically, or explicitly */
3389 void reportStatus(Signal* signal);
3390 /* redo log file initialization completed report*/
3391 void logfileInitCompleteReport(Signal* signal);
3392
3393 void check_send_scan_hb_rep(Signal* signal, ScanRecord*, TcConnectionrec*);
3394
3395 void unlockError(Signal* signal, Uint32 error, TcConnectionrecPtr);
3396 void handleUserUnlockRequest(Signal* signal, TcConnectionrecPtr);
3397
3398 void execLCP_STATUS_CONF(Signal* signal);
3399 void execLCP_STATUS_REF(Signal* signal);
3400
3401 private:
3402
3403 void startLcpFragWatchdog(Signal* signal);
3404 void stopLcpFragWatchdog();
3405 void invokeLcpFragWatchdogThread(Signal* signal);
3406 void checkLcpFragWatchdog(Signal* signal);
3407 const char* lcpStateString(LcpStatusConf::LcpState);
3408
3409 /**
3410 * TUPle deallocation ref counting
3411 */
3412 void incrDeallocRefCount(Signal* signal, Uint32 opPtrI, Uint32 countOpPtrI);
3413 Uint32 decrDeallocRefCount(Signal* signal, Uint32 opPtrI);
3414 void handleDeallocOp(Signal* signal, TcConnectionrecPtr regTcPtr);
3415
3416 Dbtup* c_tup;
3417 Dbtux* c_tux;
3418 Dbacc* c_acc;
3419 Pgman* c_pgman;
3420 Backup* c_backup;
3421 Lgman* c_lgman;
3422 Restore* c_restore;
3423
3424 /**
3425 * Read primary key from tup
3426 */
3427 Uint32 readPrimaryKeys(ScanRecord*, TcConnectionrec*, Uint32 * dst);
3428
3429 /**
3430 * Read primary key from operation
3431 */
3432 public:
3433 Uint32 readPrimaryKeys(Uint32 opPtrI, Uint32 * dst, bool xfrm);
3434 private:
3435
3436 void acckeyconf_tupkeyreq(Signal*, TcConnectionrec*, Fragrecord*,
3437 Uint32, Uint32, Uint32);
3438 void acckeyconf_load_diskpage(Signal*,TcConnectionrecPtr,Fragrecord*,
3439 Uint32, Uint32);
3440
3441 void handle_nr_copy(Signal*, Ptr<TcConnectionrec>);
3442 void exec_acckeyreq(Signal*, Ptr<TcConnectionrec>);
3443 int compare_key(const TcConnectionrec*, const Uint32 * ptr, Uint32 len);
3444 void nr_copy_delete_row(Signal*, Ptr<TcConnectionrec>, Local_key*, Uint32);
3445 Uint32 getKeyInfoWordOrZero(const TcConnectionrec* regTcPtr,
3446 Uint32 offset);
3447 public:
3448 struct Nr_op_info
3449 {
3450 Uint32 m_ptr_i;
3451 Uint32 m_tup_frag_ptr_i;
3452 Uint32 m_gci_hi;
3453 Uint32 m_gci_lo;
3454 Uint32 m_page_id;
3455 Local_key m_disk_ref;
3456 Local_key m_row_id;
3457 };
3458 void get_nr_op_info(Nr_op_info*, Uint32 page_id = RNIL);
3459 void nr_delete_complete(Signal*, Nr_op_info*);
3460 Uint64 m_update_size;
3461 Uint64 m_insert_size;
3462 Uint64 m_delete_size;
add_update_size(Uint64 average_row_size)3463 void add_update_size(Uint64 average_row_size)
3464 {
3465 m_update_size += average_row_size;
3466 }
add_insert_size(Uint64 average_row_size)3467 void add_insert_size(Uint64 average_row_size)
3468 {
3469 m_insert_size += average_row_size;
3470 }
add_delete_size(Uint64 average_row_size)3471 void add_delete_size(Uint64 average_row_size)
3472 {
3473 m_delete_size += average_row_size;
3474 }
3475
3476 public:
3477 void acckeyconf_load_diskpage_callback(Signal*, Uint32, Uint32);
3478
3479 private:
3480 void next_scanconf_load_diskpage(Signal* signal,
3481 ScanRecord * const scanPtr,
3482 Ptr<TcConnectionrec> regTcPtr,
3483 Fragrecord* fragPtrP);
3484
3485 void next_scanconf_tupkeyreq(Signal* signal,
3486 ScanRecord * const scanPtr,
3487 TcConnectionrec * regTcPtr,
3488 Fragrecord* fragPtrP,
3489 Uint32 disk_page);
3490
3491 public:
3492 void next_scanconf_load_diskpage_callback(Signal* signal, Uint32, Uint32);
3493
3494 void tupcommit_conf_callback(Signal* signal, Uint32 tcPtrI);
3495 private:
3496 void tupcommit_conf(Signal* signal,
3497 TcConnectionrecPtr,
3498 Fragrecord *);
3499
3500 void mark_end_of_lcp_restore(Signal* signal);
3501 void log_fragment_copied(Signal* signal);
3502
3503 // ----------------------------------------------------------------
3504 // These are variables handling the records. For most records one
3505 // pointer to the array of structs, one pointer-struct, a file size
3506 // and a first free record variable. The pointer struct are temporary
3507 // variables that are kept on the class object since there are often a
3508 // great deal of those variables that exist simultaneously and
3509 // thus no perfect solution of handling them is currently available.
3510 // ----------------------------------------------------------------
3511 /* ------------------------------------------------------------------------- */
3512 /* POSITIONS WITHIN THE ATTRINBUF AND THE MAX SIZE OF DATA WITHIN AN */
3513 /* ATTRINBUF. */
3514 /* ------------------------------------------------------------------------- */
3515
3516
3517 #define ZADDFRAGREC_FILE_SIZE 1
3518 AddFragRecord *addFragRecord;
3519 AddFragRecordPtr addfragptr;
3520 UintR cfirstfreeAddfragrec;
3521 UintR caddfragrecFileSize;
3522 Uint32 c_active_add_frag_ptr_i;
3523
3524 // Configurable
3525 Fragrecord_pool c_fragment_pool;
3526 RSS_AP_SNAPSHOT(c_fragment_pool);
3527
3528 #define ZGCPREC_FILE_SIZE 1
3529 GcpRecord *gcpRecord;
3530 GcpRecordPtr gcpPtr;
3531 UintR cgcprecFileSize;
3532
3533 // MAX_NDB_NODES is the size of this array
3534 HostRecord *hostRecord;
3535 UintR chostFileSize;
3536
3537 #define ZNO_CONCURRENT_LCP 1
3538 LcpRecord *lcpRecord;
3539 LcpRecordPtr lcpPtr;
3540 UintR cfirstfreeLcpLoc;
3541 UintR clcpFileSize;
3542
3543 LogPartRecord *logPartRecord;
3544 LogPartRecordPtr logPartPtr;
3545 UintR clogPartFileSize;
3546 Uint32 clogFileSize; // In MBYTE
3547 /* Max entries for log file:mb meta info in file page zero */
3548 Uint32 cmaxLogFilesInPageZero;
3549 /* Max valid entries for log file:mb meta info in file page zero
3550 * = cmaxLogFilesInPageZero - 1
3551 * as entry zero (for current file) is invalid.
3552 */
3553 Uint32 cmaxValidLogFilesInPageZero;
3554
3555 #if defined VM_TRACE || defined ERROR_INSERT
3556 Uint32 cmaxLogFilesInPageZero_DUMP;
3557 #endif
3558
3559 #if defined ERROR_INSERT
3560 Uint32 delayOpenFilePtrI;
3561 #endif
3562
3563 // Configurable
3564 LogFileRecord *logFileRecord;
3565 LogFileRecordPtr logFilePtr;
3566 UintR cfirstfreeLogFile;
3567 UintR clogFileFileSize;
3568
3569 #define ZLFO_MIN_FILE_SIZE 256
3570 // RedoBuffer/32K minimum ZLFO_MIN_FILE_SIZE
3571 LogFileOperationRecord *logFileOperationRecord;
3572 LogFileOperationRecordPtr lfoPtr;
3573 UintR cfirstfreeLfo;
3574 UintR clfoFileSize;
3575
3576 LogPageRecord *logPageRecord;
3577 LogPageRecordPtr logPagePtr;
3578 UintR cfirstfreeLogPage;
3579 UintR clogPageFileSize;
3580 Uint32 clogPageCount;
3581
3582 #define ZPAGE_REF_FILE_SIZE 20
3583 PageRefRecord *pageRefRecord;
3584 PageRefRecordPtr pageRefPtr;
3585 UintR cfirstfreePageRef;
3586 UintR cpageRefFileSize;
3587
3588
3589 // Configurable
3590 Tablerec *tablerec;
3591 UintR ctabrecFileSize;
3592
3593 // MAX_NDB_NODES is the size of this array
3594 TcNodeFailRecord *tcNodeFailRecord;
3595 UintR ctcNodeFailrecFileSize;
3596
3597 Uint16 terrorCode;
3598
3599 Uint32 c_firstInNodeGroup;
3600
3601 /**
3602 * The below variables are a set of block variables that
3603 * are used heavily in executions of scan and key operations.
3604 * Most of them are set up again after each real-time break.
3605 */
3606 ScanRecordPtr scanptr;
3607 FragrecordPtr fragptr;
3608 FragrecordPtr prim_tab_fragptr;
3609 TablerecPtr tabptr;
3610 Uint32 m_scan_direct_count;
3611 Uint32 m_tot_scan_direct_count;
3612 /**
3613 * Keep track if we should unwind the stack before calling
3614 * send_next_NEXT_SCANREQ.
3615 */
3616 Uint32 m_in_send_next_scan;
3617
3618 // ------------------------------------------------------------------------
3619 // These variables are used to store block state which do not need arrays
3620 // of struct's.
3621 // ------------------------------------------------------------------------
3622 Uint32 c_lcpId;
3623 Uint32 cnoOfFragsCheckpointed;
3624 Uint32 c_last_force_lcp_time;
3625 Uint32 c_free_mb_force_lcp_limit; // Force lcp when less than this free mb
3626 Uint32 c_free_mb_tail_problem_limit; // Set TAIL_PROBLEM when less than this..
3627
3628 Uint32 c_max_scan_direct_count;
3629 /* ------------------------------------------------------------------------- */
3630 // cmaxWordsAtNodeRec keeps track of how many words that currently are
3631 // outstanding in a node recovery situation.
3632 /* ------------------------------------------------------------------------- */
3633 UintR cmaxWordsAtNodeRec;
3634 /* ------------------------------------------------------------------------- */
3635 /*THIS STATE VARIABLE IS ZTRUE IF AN ADD NODE IS ONGOING. ADD NODE MEANS */
3636 /*THAT CONNECTIONS ARE SET-UP TO THE NEW NODE. */
3637 /* ------------------------------------------------------------------------- */
3638 Uint8 caddNodeState;
3639 /* ------------------------------------------------------------------------- */
3640 /*THIS VARIABLE SPECIFIES WHICH TYPE OF RESTART THAT IS ONGOING */
3641 /* ------------------------------------------------------------------------- */
3642 Uint16 cstartType;
3643 /* ------------------------------------------------------------------------- */
3644 /*THIS VARIABLE INDICATES WHETHER AN INITIAL RESTART IS ONGOING OR NOT. */
3645 /* ------------------------------------------------------------------------- */
3646 Uint8 cinitialStartOngoing;
3647 /* ------------------------------------------------------------------------- */
3648 /*THIS VARIABLE KEEPS TRACK OF WHEN TUP AND ACC HAVE COMPLETED EXECUTING */
3649 /*THEIR UNDO LOG. */
3650 /* ------------------------------------------------------------------------- */
3651 ExecUndoLogState csrExecUndoLogState;
3652 /* ------------------------------------------------------------------------- */
3653 /*THIS VARIABLE KEEPS TRACK OF WHEN TUP AND ACC HAVE CONFIRMED COMPLETION */
3654 /*OF A LOCAL CHECKPOINT ROUND. */
3655 /* ------------------------------------------------------------------------- */
3656 LcpCloseState clcpCompletedState;
3657 /* ------------------------------------------------------------------------- */
3658 /*DURING CONNECTION PROCESSES IN SYSTEM RESTART THESE VARIABLES KEEP TRACK */
3659 /*OF HOW MANY CONNECTIONS AND RELEASES THAT ARE TO BE PERFORMED. */
3660 /* ------------------------------------------------------------------------- */
3661 /***************************************************************************>*/
3662 /*THESE VARIABLES CONTAIN INFORMATION USED DURING SYSTEM RESTART. */
3663 /***************************************************************************>*/
3664 /* ------------------------------------------------------------------------- */
3665 /*THIS VARIABLE IS ZTRUE IF THE SIGNAL START_REC_REQ HAVE BEEN RECEIVED. */
3666 /*RECEPTION OF THIS SIGNAL INDICATES THAT ALL FRAGMENTS THAT THIS NODE */
3667 /*SHOULD START HAVE BEEN RECEIVED. */
3668 /* ------------------------------------------------------------------------- */
3669 enum {
3670 SRR_INITIAL = 0
3671 ,SRR_START_REC_REQ_ARRIVED = 1
3672 ,SRR_REDO_COMPLETE = 2
3673 ,SRR_FIRST_LCP_DONE = 3
3674 } cstartRecReq;
3675 Uint32 cstartRecReqData;
3676
3677 /* ------------------------------------------------------------------------- */
3678 /*THIS VARIABLE KEEPS TRACK OF HOW MANY FRAGMENTS THAT PARTICIPATE IN */
3679 /*EXECUTING THE LOG. IF ZERO WE DON'T NEED TO EXECUTE THE LOG AT ALL. */
3680 /* ------------------------------------------------------------------------- */
3681 Uint32 cnoFragmentsExecSr;
3682
3683 /**
3684 * This is no of sent GSN_EXEC_FRAGREQ during this log phase
3685 */
3686 Uint32 cnoOutstandingExecFragReq;
3687
3688 /* ------------------------------------------------------------------------- */
3689 /*THIS VARIABLE KEEPS TRACK OF WHICH OF THE FIRST TWO RESTART PHASES THAT */
3690 /*HAVE COMPLETED. */
3691 /* ------------------------------------------------------------------------- */
3692 Uint8 csrPhaseStarted;
3693 /* ------------------------------------------------------------------------- */
3694 /*NUMBER OF PHASES COMPLETED OF EXECUTING THE FRAGMENT LOG. */
3695 /* ------------------------------------------------------------------------- */
3696 Uint8 csrPhasesCompleted;
3697 /* ------------------------------------------------------------------------- */
3698 /*THE BLOCK REFERENCE OF THE MASTER DIH DURING SYSTEM RESTART. */
3699 /* ------------------------------------------------------------------------- */
3700 BlockReference cmasterDihBlockref;
3701 /* ------------------------------------------------------------------------- */
3702 /*THIS VARIABLE IS THE HEAD OF A LINKED LIST OF FRAGMENTS WAITING TO BE */
3703 /*RESTORED FROM DISK. */
3704 /* ------------------------------------------------------------------------- */
3705 Fragrecord_fifo c_lcp_waiting_fragments; // StartFragReq'ed
3706 Fragrecord_fifo c_lcp_restoring_fragments; // Restoring as we speek
3707 Fragrecord_fifo c_lcp_complete_fragments; // Restored
3708 Fragrecord_fifo c_queued_lcp_frag_ord; //Queue for LCP_FRAG_ORDs
3709
3710 /* ------------------------------------------------------------------------- */
3711 /*USED DURING SYSTEM RESTART, INDICATES THE OLDEST GCI THAT CAN BE RESTARTED */
3712 /*FROM AFTER THIS SYSTEM RESTART. USED TO FIND THE LOG TAIL. */
3713 /* ------------------------------------------------------------------------- */
3714 UintR crestartOldestGci;
3715 /* ------------------------------------------------------------------------- */
3716 /*USED DURING SYSTEM RESTART, INDICATES THE NEWEST GCI THAT CAN BE RESTARTED */
3717 /*AFTER THIS SYSTEM RESTART. USED TO FIND THE LOG HEAD. */
3718 /* ------------------------------------------------------------------------- */
3719 UintR crestartNewestGci;
3720
3721 bool c_is_first_gcp_save_started;
3722 /* ------------------------------------------------------------------------- */
3723 /*THE NUMBER OF LOG FILES. SET AS A PARAMETER WHEN NDB IS STARTED. */
3724 /* ------------------------------------------------------------------------- */
3725 UintR cnoLogFiles;
3726 /* ------------------------------------------------------------------------- */
3727 /*THESE TWO VARIABLES CONTAIN THE NEWEST GCI RECEIVED IN THE BLOCK AND THE */
3728 /*NEWEST COMPLETED GCI IN THE BLOCK. */
3729 /* ------------------------------------------------------------------------- */
3730 UintR cnewestGci;
3731 UintR cnewestCompletedGci;
3732 /* ------------------------------------------------------------------------- */
3733 /*THIS VARIABLE ONLY PASSES INFORMATION FROM STTOR TO STTORRY = TEMPORARY */
3734 /* ------------------------------------------------------------------------- */
3735 Uint16 csignalKey;
3736 /* ------------------------------------------------------------------------- */
3737 /*THIS VARIABLE CONTAINS THE CURRENT START PHASE IN THE BLOCK. IS ZNIL IF */
3738 /*NO SYSTEM RESTART IS ONGOING. */
3739 /* ------------------------------------------------------------------------- */
3740 Uint16 cstartPhase;
3741 /* ------------------------------------------------------------------------- */
3742 /*THIS VARIABLE CONTAIN THE CURRENT GLOBAL CHECKPOINT RECORD. IT'S RNIL IF */
3743 /*NOT A GCP SAVE IS ONGOING. */
3744 /* ------------------------------------------------------------------------- */
3745 UintR ccurrentGcprec;
3746 /* ------------------------------------------------------------------------- */
3747 /*THESE VARIABLES ARE USED TO KEEP TRACK OF ALL ACTIVE COPY FRAGMENTS IN LQH.*/
3748 /* ------------------------------------------------------------------------- */
3749 Uint8 cnoActiveCopy;
3750 UintR cactiveCopy[4];
3751 /* ------------------------------------------------------------------------- */
3752 /* These variable is used to keep track of what time we have reported so far */
3753 /* in the TIME_SIGNAL handling. */
3754 /* ------------------------------------------------------------------------- */
3755 NDB_TICKS c_latestTIME_SIGNAL;
3756 Uint64 c_elapsed_time_millis;
3757
3758 /* ------------------------------------------------------------------------- */
3759 /*THESE VARIABLES CONTAIN THE BLOCK REFERENCES OF THE OTHER NDB BLOCKS. */
3760 /*ALSO THE BLOCK REFERENCE OF MY OWN BLOCK = LQH */
3761 /* ------------------------------------------------------------------------- */
3762 BlockReference caccBlockref;
3763 BlockReference ctupBlockref;
3764 BlockReference ctuxBlockref;
3765 BlockReference cownref;
3766 Uint32 cTransactionDeadlockDetectionTimeout;
3767 UintR cLqhTimeOutCount;
3768 UintR cLqhTimeOutCheckCount;
3769 UintR cnoOfLogPages;
3770 /* ------------------------------------------------------------------------- */
3771 /*THIS VARIABLE CONTAINS MY OWN PROCESSOR ID. */
3772 /* ------------------------------------------------------------------------- */
3773 NodeId cownNodeid;
3774
3775 /* ------------------------------------------------------------------------- */
3776 /*THESE VARIABLES CONTAIN INFORMATION ABOUT THE OTHER NODES IN THE SYSTEM */
3777 /*THESE VARIABLES ARE MOSTLY USED AT SYSTEM RESTART AND ADD NODE TO SET-UP */
3778 /*AND RELEASE CONNECTIONS TO OTHER NODES IN THE CLUSTER. */
3779 /* ------------------------------------------------------------------------- */
3780 /* ------------------------------------------------------------------------- */
3781 /*THIS ARRAY CONTAINS THE PROCESSOR ID'S OF THE NODES THAT ARE ALIVE. */
3782 /*CNO_OF_NODES SPECIFIES HOW MANY NODES THAT ARE CURRENTLY ALIVE. */
3783 /*CNODE_VERSION SPECIFIES THE NDB VERSION EXECUTING ON THE NODE. */
3784 /* ------------------------------------------------------------------------- */
3785 UintR cpackedListIndex;
3786 Uint16 cpackedList[MAX_NDB_NODES];
3787 UintR cnodeData[MAX_NDB_NODES];
3788 UintR cnodeStatus[MAX_NDB_NODES];
3789 UintR cnoOfNodes;
3790
3791 NdbNodeBitmask m_sr_nodes;
3792 NdbNodeBitmask m_sr_exec_sr_req;
3793 NdbNodeBitmask m_sr_exec_sr_conf;
3794
3795 /* ------------------------------------------------------------------------- */
3796 /* THIS VARIABLE CONTAINS THE DIRECTORY OF A HASH TABLE OF ALL ACTIVE */
3797 /* OPERATION IN THE BLOCK. IT IS USED TO BE ABLE TO QUICKLY ABORT AN */
3798 /* OPERATION WHERE THE CONNECTION WAS LOST DUE TO NODE FAILURES. IT IS */
3799 /* ACTUALLY USED FOR ALL ABORTS COMMANDED BY TC. */
3800 /* ------------------------------------------------------------------------- */
3801 UintR preComputedRequestInfoMask;
3802 #define TRANSID_HASH_SIZE 4096
3803 UintR ctransidHash[TRANSID_HASH_SIZE];
3804
3805 Uint32 c_diskless;
3806 Uint32 c_o_direct;
3807 Uint32 c_o_direct_sync_flag;
3808 Uint32 m_use_om_init;
3809 Uint32 c_error_insert_table_id;
3810
3811 #ifndef NO_REDO_PAGE_CACHE
3812 /***********************************************************
3813 * MODULE: Redo Page Cache
3814 *
3815 * When running redo, current codes scan log until finding a commit
3816 * record (for an operation). The commit record contains a back-pointer
3817 * to a prepare-record.
3818 *
3819 * If the prepare record is inside the 512k window that is being read
3820 * from redo-log, the access is quick.
3821 *
3822 * But it's not, then the following sequence is performed
3823 * [file-open]?[page-read][execute-log-record][file-close]?[release-page]
3824 *
3825 * For big (or long running) transactions this becomes very inefficient
3826 *
3827 * The RedoPageCache changes this so that the pages that are not released
3828 * in sequence above, but rather put into a LRU (using RedoBuffer)
3829 */
3830
3831 /**
3832 * This is a "dummy" struct that is used when
3833 * putting LogPageRecord-entries into lists/hashes
3834 */
3835 struct RedoCacheLogPageRecord
3836 {
RedoCacheLogPageRecordDblqh::RedoCacheLogPageRecord3837 RedoCacheLogPageRecord() {}
3838 /**
3839 * NOTE: These numbers must match page-header definition
3840 */
3841 Uint32 header0[15];
3842 Uint32 m_page_no;
3843 Uint32 m_file_no;
3844 Uint32 header1[5];
3845 Uint32 m_part_no;
3846 Uint32 nextList;
3847 Uint32 nextHash;
3848 Uint32 prevList;
3849 Uint32 prevHash;
3850 Uint32 rest[8192-27];
3851
equalDblqh::RedoCacheLogPageRecord3852 inline bool equal(const RedoCacheLogPageRecord & p) const {
3853 return
3854 (p.m_part_no == m_part_no) &&
3855 (p.m_page_no == m_page_no) &&
3856 (p.m_file_no == m_file_no);
3857 }
3858
hashValueDblqh::RedoCacheLogPageRecord3859 inline Uint32 hashValue() const {
3860 return (m_part_no << 24) + (m_file_no << 16) + m_page_no;
3861 }
3862 };
3863 typedef ArrayPool<RedoCacheLogPageRecord> RedoCacheLogPageRecord_pool;
3864 typedef DLHashTable<RedoCacheLogPageRecord_pool> RedoCacheLogPageRecord_hash;
3865 typedef DLCFifoList<RedoCacheLogPageRecord_pool> RedoCacheLogPageRecord_fifo;
3866
3867 struct RedoPageCache
3868 {
RedoPageCacheDblqh::RedoPageCache3869 RedoPageCache() : m_hash(m_pool), m_lru(m_pool),
3870 m_hits(0),m_multi_page(0), m_multi_miss(0) {}
3871 RedoCacheLogPageRecord_hash m_hash;
3872 RedoCacheLogPageRecord_fifo m_lru;
3873 RedoCacheLogPageRecord_pool m_pool;
3874 Uint32 m_hits;
3875 Uint32 m_multi_page;
3876 Uint32 m_multi_miss;
3877 } m_redo_page_cache;
3878
3879 void evict(RedoPageCache&, Uint32 cnt);
3880 void do_evict(RedoPageCache&, Ptr<RedoCacheLogPageRecord>);
3881 void addCachePages(RedoPageCache&,
3882 Uint32 partNo,
3883 Uint32 startPageNo,
3884 LogFileOperationRecord*);
3885 void release(RedoPageCache&);
3886 #endif
3887
3888 #ifndef NO_REDO_OPEN_FILE_CACHE
3889 struct RedoOpenFileCache
3890 {
RedoOpenFileCacheDblqh::RedoOpenFileCache3891 RedoOpenFileCache() : m_lru(m_pool), m_hits(0), m_close_cnt(0) {}
3892
3893 LogFileRecord_fifo m_lru;
3894 LogFileRecord_pool m_pool;
3895 Uint32 m_hits;
3896 Uint32 m_close_cnt;
3897 } m_redo_open_file_cache;
3898
3899 void openFileRw_cache(Signal* signal, LogFileRecordPtr olfLogFilePtr);
3900 void closeFile_cache(Signal* signal, LogFileRecordPtr logFilePtr, Uint32);
3901 void release(Signal*, RedoOpenFileCache&);
3902 #endif
3903
3904 public:
3905 void execINFO_GCP_STOP_TIMER(Signal*);
3906 Uint32 c_gcp_stop_timer;
3907
3908 bool is_same_trans(Uint32 opId, Uint32 trid1, Uint32 trid2);
3909 void get_op_info(Uint32 opId, Uint32 *hash, Uint32* gci_hi, Uint32* gci_lo,
3910 Uint32* transId1, Uint32* transId2);
3911 void accminupdate(Signal*, Uint32 opPtrI, const Local_key*);
3912 void accremoverow(Signal*, Uint32 opPtrI, const Local_key*);
3913
3914 /**
3915 *
3916 */
3917 struct CommitAckMarker {
3918 STATIC_CONST( TYPE_ID = RT_DBLQH_COMMIT_ACK_MARKER );
3919 Uint32 m_magic;
3920
CommitAckMarkerDblqh::CommitAckMarker3921 CommitAckMarker() :
3922 m_magic(Magic::make(TYPE_ID))
3923 {}
3924
~CommitAckMarkerDblqh::CommitAckMarker3925 ~CommitAckMarker()
3926 {}
3927
3928 Uint32 transid1;
3929 Uint32 transid2;
3930
3931 Uint32 apiRef; // Api block ref
3932 Uint32 apiOprec; // Connection Object in NDB API
3933 BlockReference tcRef;
3934 union { Uint32 nextPool; Uint32 nextHash; };
3935 Uint32 prevHash;
3936 Uint32 reference_count;
3937 bool in_hash;
3938 bool removed_by_fail_api;
3939
equalDblqh::CommitAckMarker3940 inline bool equal(const CommitAckMarker & p) const {
3941 return ((p.transid1 == transid1) && (p.transid2 == transid2));
3942 }
3943
hashValueDblqh::CommitAckMarker3944 inline Uint32 hashValue() const {
3945 return transid1;
3946 }
3947 };
3948 STATIC_CONST(DBLQH_COMMIT_ACK_MARKER_TRANSIENT_POOL_INDEX = 2);
3949 typedef Ptr<CommitAckMarker> CommitAckMarkerPtr;
3950 typedef TransientPool<CommitAckMarker> CommitAckMarker_pool;
3951 typedef DLHashTable<CommitAckMarker_pool> CommitAckMarker_hash;
3952
3953 CommitAckMarker_pool m_commitAckMarkerPool;
3954 CommitAckMarker_hash m_commitAckMarkerHash;
3955 typedef CommitAckMarker_hash::Iterator CommitAckMarkerIterator;
3956 void execREMOVE_MARKER_ORD(Signal* signal);
3957 void scanMarkers(Signal* signal, Uint32 tcNodeFail, Uint32 i);
3958 bool check_tc_and_update_max_instance(BlockReference ref,
3959 TcNodeFailRecord *tcNodeFailPtr);
3960
3961 void ndbdFailBlockCleanupCallback(Signal* signal, Uint32 failedNodeID, Uint32 ignoredRc);
3962
3963 struct MonotonicCounters {
MonotonicCountersDblqh::MonotonicCounters3964 MonotonicCounters() :
3965 operations(0) {}
3966
3967 Uint64 operations;
3968
build_event_repDblqh::MonotonicCounters3969 Uint32 build_event_rep(Signal* signal) const
3970 {
3971 /*
3972 Read saved value from CONTINUEB, subtract from
3973 counter and write to EVENT_REP
3974 */
3975 struct { const Uint64* ptr; Uint64 old; } vars[] = {
3976 { &operations, 0 }
3977 };
3978 const size_t num = sizeof(vars)/sizeof(vars[0]);
3979
3980 signal->theData[0] = NDB_LE_OperationReportCounters;
3981
3982 // Read old values from signal
3983 for (size_t i = 0; i < num ; i++)
3984 {
3985 vars[i].old =
3986 (signal->theData[1+(2*i)+1] |(Uint64(signal->theData[1+(2*i)])<< 32));
3987 }
3988
3989 // Write difference back to signal
3990 for (size_t i = 0; i < num ; i++)
3991 {
3992 signal->theData[1 + i] = (Uint32)(*vars[i].ptr - vars[i].old);
3993 }
3994 return 1 + num;
3995 }
3996
build_continueBDblqh::MonotonicCounters3997 Uint32 build_continueB(Signal* signal) const
3998 {
3999 /* Save current value of counters to CONTINUEB */
4000 const Uint64* vars[] = { &operations };
4001 const size_t num = sizeof(vars)/sizeof(vars[0]);
4002
4003 for (size_t i = 0; i < num ; i++)
4004 {
4005 signal->theData[1+i*2] = Uint32(*vars[i] >> 32);
4006 signal->theData[1+i*2+1] = Uint32(*vars[i]);
4007 }
4008 return 1 + num * 2;
4009 }
4010
4011 } c_Counters;
4012
4013 Uint32 c_max_redo_lag;
4014 Uint32 c_max_redo_lag_counter;
4015 Uint64 cTotalLqhKeyReqCount;
4016 Uint32 c_max_parallel_scans_per_frag;
4017
4018 Uint64 c_keyOverloads;
4019
4020 /* All that apply */
4021 Uint64 c_keyOverloadsTcNode;
4022 Uint64 c_keyOverloadsReaderApi;
4023 Uint64 c_keyOverloadsPeerNode;
4024 Uint64 c_keyOverloadsSubscriber;
4025
4026 Uint64 c_scanSlowDowns;
4027
4028 /**
4029 Startup logging:
4030
4031 c_fragmentsStarted:
4032 Total number of fragments started as part of node restart
4033 c_fragmentsStartedWithCopy:
4034 Number of fragments started by complete copy where no useful LCP was
4035 accessible for the fragment.
4036 c_fragCopyFrag:
4037 The current fragment id copied
4038 c_fragCopyTable:
4039 The current table id copied
4040 c_fragCopyRowsIns:
4041 The number of rows inserted in current fragment
4042 c_fragCopyRowsDel:
4043 The number of rows deleted in current fragment
4044 c_fragBytesCopied:
4045 The number of bytes sent over the wire to copy the current fragment
4046
4047 c_fragmentCopyStart:
4048 Time of start of copy fragment
4049 c_fragmentsCopied:
4050 Number of fragments copied
4051 c_totalCopyRowsIns:
4052 Total number of rows inserted as part of copy process
4053 c_totalCopyRowsDel:
4054 Total number of rows deleted as part of copy process
4055 c_totalBytesCopied:
4056 Total number of bytes sent over the wire as part of the copy process
4057 */
4058 Uint32 c_fragmentsStarted;
4059 Uint32 c_fragmentsStartedWithCopy; /* Non trans -> 2PINR */
4060
4061 Uint32 c_fragCopyFrag;
4062 Uint32 c_fragCopyTable;
4063 Uint64 c_fragCopyRowsIns;
4064 Uint64 c_fragCopyRowsDel;
4065 Uint64 c_fragBytesCopied;
4066
4067 Uint64 c_fragmentCopyStart;
4068 Uint32 c_fragmentsCopied;
4069 Uint64 c_totalCopyRowsIns;
4070 Uint64 c_totalCopyRowsDel;
4071 Uint64 c_totalBytesCopied;
4072
4073 bool is_first_instance();
4074 bool is_copy_frag_in_progress();
4075 bool is_scan_ok(ScanRecord*, Fragrecord::FragStatus);
4076 void set_min_keep_gci(Uint32 max_completed_gci);
4077
4078 void sendRESTORABLE_GCI_REP(Signal*, Uint32 gci);
4079 void start_synch_gcp(Signal*);
4080 void start_local_lcp(Signal*, Uint32 lcpId, Uint32 localLcpId);
4081
4082 void execLCP_ALL_COMPLETE_CONF(Signal*);
4083 void execSET_LOCAL_LCP_ID_CONF(Signal*);
4084 void execCOPY_FRAG_NOT_IN_PROGRESS_REP(Signal*);
4085 void execCUT_REDO_LOG_TAIL_REQ(Signal*);
4086
4087 /**
4088 * Variable keeping track of which GCI to keep in REDO log
4089 * after completing a LCP.
4090 */
4091 Uint32 c_max_keep_gci_in_lcp;
4092 Uint32 c_keep_gci_for_lcp;
4093 bool c_first_set_min_keep_gci;
4094
4095 /**
4096 * Some code and variables to serialize access to NDBCNTR for
4097 * writes of the local sysfile.
4098 */
4099 bool c_start_phase_9_waiting;
4100 bool c_outstanding_write_local_sysfile;
4101 bool c_send_gcp_saveref_needed;
4102
4103 /**
4104 * Variable that keeps track of maximum GCI that was recorded in the
4105 * LCP. When this GCI is safe on disk the entire LCP is safe on disk.
4106 */
4107 Uint32 c_max_gci_in_lcp;
4108
4109 /* Have we sent WAIT_COMPLETE_LCP_CONF yet */
4110 bool c_local_lcp_sent_wait_complete_conf;
4111
4112 /* Have we sent WAIT_ALL_COMPLETE_LCP_REQ yet */
4113 bool c_local_lcp_sent_wait_all_complete_lcp_req;
4114
4115 /**
4116 * Current ongoing local LCP id, == 0 means distributed LCP */
4117 Uint32 c_localLcpId;
4118
4119 /* Counter for starting local LCP ordered by UNDO log overload */
4120 Uint32 c_current_local_lcp_table_id;
4121
4122 /**
4123 * Set flag that indicates that first distributed LCP is started.
4124 * This means that we should distribute the signal
4125 * RESTORABLE_GCI_REP to the backup block even if first LCP isn't
4126 * done yet.
4127 */
4128 bool m_first_distributed_lcp_started;
4129 /**
4130 * 0/1 toggled for each local LCP executed to keep track of which
4131 * fragments have been started as part of this local LCP and which
4132 * haven't.
4133 */
4134 Uint8 c_current_local_lcp_instance;
4135
4136 /* Variable set when local LCP starts and when it stops it is reset */
4137 bool c_local_lcp_started;
4138
4139 /**
4140 * Variable set when local LCP is started due to UNDO log overload.
4141 */
4142 bool c_full_local_lcp_started;
4143
4144 /* Is Copy Fragment process currently ongoing */
4145 bool c_copy_fragment_in_progress;
4146
4147 void start_lcp_on_table(Signal*);
4148 void send_lastLCP_FRAG_ORD(Signal*);
4149
4150 /**
4151 * Variables tracking state of Halt/Resume Copy Fragment process on
4152 * Client side (starting node). Also methods.
4153 * ------------------------------------------
4154 */
4155
4156 /* Copy fragment process have been halted indicator */
4157 bool c_copy_frag_halted;
4158
4159 /* Halt process is locked while waiting for response from live node */
4160 bool c_copy_frag_halt_process_locked;
4161
4162 /* Is UNDO log currently overloaded */
4163 bool c_undo_log_overloaded;
4164
4165 enum COPY_FRAG_HALT_STATE_TYPE
4166 {
4167 COPY_FRAG_HALT_STATE_IDLE = 0,
4168 COPY_FRAG_HALT_WAIT_FIRST_LQHKEYREQ = 1,
4169 PREPARE_COPY_FRAG_IS_HALTED = 2,
4170 WAIT_RESUME_COPY_FRAG_CONF = 3,
4171 WAIT_HALT_COPY_FRAG_CONF = 4,
4172 COPY_FRAG_IS_HALTED = 5
4173 };
4174 /* State of halt copy fragment process */
4175 COPY_FRAG_HALT_STATE_TYPE c_copy_frag_halt_state;
4176
4177 /* Save of PREPARE_COPY_FRAGREQ signal */
4178 PrepareCopyFragReq c_prepare_copy_fragreq_save;
4179
4180 void send_prepare_copy_frag_conf(Signal*,
4181 PrepareCopyFragReq&,
4182 Uint32,
4183 Uint32);
4184 /**
4185 * Variables tracking state of Halt/Resume Copy Fragment process on
4186 * Server side (live node).
4187 */
4188 Uint32 c_tc_connect_rec_copy_frag;
4189 bool c_copy_frag_live_node_halted;
4190 bool c_copy_frag_live_node_performing_halt;
4191 HaltCopyFragReq c_halt_copy_fragreq_save;
4192
getAllowRead() const4193 inline bool getAllowRead() const {
4194 return getNodeState().startLevel < NodeState::SL_STOPPING_3;
4195 }
4196
4197 inline bool TRACE_OP_CHECK(const TcConnectionrec* regTcPtr);
4198 #ifdef ERROR_INSERT
4199 void TRACE_OP_DUMP(const TcConnectionrec* regTcPtr, const char * pos);
4200 #endif
4201
4202 #ifdef ERROR_INSERT
4203 Uint32 c_master_node_id;
4204 #endif
4205
4206 Uint32 get_node_status(Uint32 nodeId) const;
4207 bool check_ndb_versions() const;
4208
4209 void suspendFile(Signal* signal, Uint32 filePtrI, Uint32 millis);
4210 void suspendFile(Signal* signal, Ptr<LogFileRecord> logFile, Uint32 millis);
4211
4212 void send_runredo_event(Signal*, LogPartRecord *, Uint32 currgci);
4213
4214 void sendFireTrigConfTc(Signal* signal, BlockReference ref, Uint32 Tdata[]);
4215 bool check_fire_trig_pass(Uint32 op, Uint32 pass);
4216
4217 bool handleLCPSurfacing(Signal *signal);
4218 bool is_disk_columns_in_table(Uint32 tableId);
4219 void sendSTART_FRAGCONF(Signal*);
4220 void handle_check_system_scans(Signal*);
4221 #define ZLCP_CHECK_INDEX 0
4222 #define ZBACKUP_CHECK_INDEX 1
4223 #define ZCOPY_FRAGREQ_CHECK_INDEX 2
4224 Uint32 c_check_scanptr_i[3];
4225 Uint32 c_check_scanptr_save_line[3];
4226 Uint32 c_check_scanptr_save_timer[3];
4227
4228 AlterTabReq c_keep_alter_tab_req;
4229 Uint32 c_keep_alter_tab_req_len;
4230 Uint32 c_executing_redo_log;
4231 Uint32 c_num_fragments_created_since_restart;
4232 Uint32 c_fragments_in_lcp;
4233 bool c_wait_lcp_surfacing;
4234 Uint32 get_committed_mbytes(LogPartRecord*);
4235 void increment_committed_mbytes(LogPartRecord*, TcConnectionrec*);
4236 void decrement_committed_mbytes(LogPartRecord*, TcConnectionrec*);
4237 bool is_restore_phase_done();
4238 bool is_full_local_lcp_running();
4239 bool is_lcp_idle(LcpRecord *lcpPtrP);
4240 Uint32 m_restart_local_latest_lcp_id;
4241 private:
4242 bool getNextTcConRec(Uint32 &next,
4243 TcConnectionrecPtr &tcConnectptr,
4244 Uint32 max_loops);
4245 bool getNextScanRec(Uint32 &next,
4246 ScanRecordPtr &loc_scanptr,
4247 Uint32 max_loops);
4248 bool getNextCommitAckMarker(Uint32 &next,
4249 CommitAckMarkerPtr &commitAckMarkerPtr,
4250 Uint32 max_loops);
4251 void checkPoolShrinkNeed(Uint32 pool_index,
4252 const TransientFastSlotPool& pool);
4253 void sendPoolShrink(Uint32 pool_index);
4254 void shrinkTransientPools(Uint32 pool_index);
4255
4256 static const Uint32 c_transient_pool_count = 3;
4257 TransientFastSlotPool* c_transient_pools[c_transient_pool_count];
4258 Bitmask<1> c_transient_pools_shrinking;
4259
4260 bool seize_op_rec(TcConnectionrecPtr &tcConnectptr);
4261 void release_op_rec(TcConnectionrecPtr tcConnectptr);
4262 void send_scan_fragref(Signal*, Uint32, Uint32, Uint32, Uint32, Uint32);
4263 void init_release_scanrec(ScanRecord*);
4264 void check_pgman_prep_lcp_active_prep_drop_tab(Signal*, Uint32);
4265 void check_pgman_prep_lcp_active_drop_tab(Signal*, Uint32);
4266 public:
4267 static Uint64 getTransactionMemoryNeed(
4268 const Uint32 ldm_instance_count,
4269 const ndb_mgm_configuration_iterator * mgm_cfg,
4270 const bool use_reserved);
4271 #endif
4272 };
4273
4274 #ifndef DBLQH_STATE_EXTRACT
checkPoolShrinkNeed(const Uint32 pool_index,const TransientFastSlotPool & pool)4275 inline void Dblqh::checkPoolShrinkNeed(const Uint32 pool_index,
4276 const TransientFastSlotPool& pool)
4277 {
4278 #if defined(VM_TRACE) || defined(ERROR_INSERT)
4279 ndbrequire(pool_index < c_transient_pool_count);
4280 ndbrequire(c_transient_pools[pool_index] == &pool);
4281 #endif
4282 if (pool.may_shrink())
4283 {
4284 sendPoolShrink(pool_index);
4285 }
4286 }
4287
4288 inline
4289 bool
is_full_local_lcp_running()4290 Dblqh::is_full_local_lcp_running()
4291 {
4292 return c_full_local_lcp_started;
4293 }
4294
4295 inline
4296 bool
is_restore_phase_done()4297 Dblqh::is_restore_phase_done()
4298 {
4299 return (csrExecUndoLogState != EULS_IDLE);
4300 }
4301
4302 inline
4303 bool
check_scan_batch_completed() const4304 Dblqh::ScanRecord::check_scan_batch_completed() const
4305 {
4306 Uint32 max_rows = m_max_batch_size_rows;
4307 Uint32 max_bytes = m_max_batch_size_bytes;
4308
4309 return m_stop_batch ||
4310 (max_rows > 0 && (m_curr_batch_size_rows >= max_rows)) ||
4311 (max_bytes > 0 && (m_curr_batch_size_bytes >= max_bytes));
4312 }
4313
4314 inline
4315 void
i_get_acc_ptr(ScanRecord * scanP,Uint32 * & acc_ptr,Uint32 index)4316 Dblqh::i_get_acc_ptr(ScanRecord* scanP, Uint32* &acc_ptr, Uint32 index)
4317 {
4318 /* Return ptr to place where acc ptr for operation with given
4319 * index is stored.
4320 * If index == 0, it's stored in the ScanRecord, otherwise it's
4321 * stored in a segment linked from the ScanRecord.
4322 */
4323 if (index == 0) {
4324 acc_ptr= (Uint32*)&scanP->scan_acc_op_ptr[0];
4325 } else {
4326
4327 Uint32 segmentIVal, segment, segmentOffset;
4328 SegmentedSectionPtr segPtr;
4329
4330 segment= (index + SectionSegment::DataLength -1) /
4331 SectionSegment::DataLength;
4332 segmentOffset= (index - 1) % SectionSegment::DataLength;
4333 jamDebug();
4334 ndbassert( segment < ScanRecord::MaxScanAccSegments );
4335
4336 segmentIVal= scanP->scan_acc_op_ptr[ segment ];
4337 getSection(segPtr, segmentIVal);
4338
4339 acc_ptr= &segPtr.p->theData[ segmentOffset ];
4340 }
4341 }
4342
4343 inline
4344 bool
is_same_trans(Uint32 opId,Uint32 trid1,Uint32 trid2)4345 Dblqh::is_same_trans(Uint32 opId, Uint32 trid1, Uint32 trid2)
4346 {
4347 TcConnectionrecPtr regTcPtr;
4348 regTcPtr.i= opId;
4349 ndbrequire(tcConnect_pool.getValidPtr(regTcPtr));
4350 return ((regTcPtr.p->transid[0] == trid1) &&
4351 (regTcPtr.p->transid[1] == trid2));
4352 }
4353
4354 inline
4355 void
get_op_info(Uint32 opId,Uint32 * hash,Uint32 * gci_hi,Uint32 * gci_lo,Uint32 * transId1,Uint32 * transId2)4356 Dblqh::get_op_info(Uint32 opId, Uint32 *hash, Uint32* gci_hi, Uint32* gci_lo,
4357 Uint32* transId1, Uint32* transId2)
4358 {
4359 TcConnectionrecPtr regTcPtr;
4360 regTcPtr.i= opId;
4361 ndbrequire(tcConnect_pool.getValidPtr(regTcPtr));
4362 *hash = regTcPtr.p->hashValue;
4363 *gci_hi = regTcPtr.p->gci_hi;
4364 *gci_lo = regTcPtr.p->gci_lo;
4365 *transId1 = regTcPtr.p->transid[0];
4366 *transId2 = regTcPtr.p->transid[1];
4367 }
4368
4369 inline
4370 void
accminupdate(Signal * signal,Uint32 opId,const Local_key * key)4371 Dblqh::accminupdate(Signal* signal, Uint32 opId, const Local_key* key)
4372 {
4373 TcConnectionrecPtr regTcPtr;
4374 regTcPtr.i= opId;
4375 ndbrequire(tcConnect_pool.getValidPtr(regTcPtr));
4376 c_acc->execACCMINUPDATE(signal,
4377 regTcPtr.p->accConnectrec,
4378 regTcPtr.p->accConnectPtrP,
4379 key->m_page_no,
4380 key->m_page_idx);
4381
4382 if (ERROR_INSERTED(5714))
4383 {
4384 FragrecordPtr regFragptr;
4385 regFragptr.i = regTcPtr.p->fragmentptr;
4386 c_fragment_pool.getPtr(regFragptr);
4387 if (regFragptr.p->m_copy_started_state == Fragrecord::AC_NR_COPY)
4388 ndbout << " LK: " << *key;
4389 }
4390
4391 if (ERROR_INSERTED(5712) || ERROR_INSERTED(5713))
4392 ndbout << " LK: " << *key;
4393 regTcPtr.p->m_row_id = *key;
4394 }
4395
4396 inline
4397 void
accremoverow(Signal * signal,Uint32 opId,const Local_key * key)4398 Dblqh::accremoverow(Signal* signal, Uint32 opId, const Local_key* key)
4399 {
4400 TcConnectionrecPtr regTcPtr;
4401 regTcPtr.i= opId;
4402 ndbrequire(tcConnect_pool.getValidPtr(regTcPtr));
4403 c_acc->removerow(regTcPtr.p->accConnectrec, key);
4404 }
4405
4406 inline
4407 bool
TRACE_OP_CHECK(const TcConnectionrec * regTcPtr)4408 Dblqh::TRACE_OP_CHECK(const TcConnectionrec* regTcPtr)
4409 {
4410 if (ERROR_INSERTED(5714))
4411 {
4412 FragrecordPtr regFragptr;
4413 regFragptr.i = regTcPtr->fragmentptr;
4414 c_fragment_pool.getPtr(regFragptr);
4415 return regFragptr.p->m_copy_started_state == Fragrecord::AC_NR_COPY;
4416 }
4417
4418 return (ERROR_INSERTED(5712) &&
4419 (regTcPtr->operation == ZINSERT ||
4420 regTcPtr->operation == ZDELETE)) ||
4421 ERROR_INSERTED(5713);
4422 }
4423
4424 inline
is_scan_ok(ScanRecord * scanPtrP,Fragrecord::FragStatus fragstatus)4425 bool Dblqh::is_scan_ok(ScanRecord* scanPtrP, Fragrecord::FragStatus fragstatus)
4426 {
4427 if (likely(fragstatus == Fragrecord::FSACTIVE))
4428 {
4429 return true;
4430 }
4431 else if (likely(fragstatus == Fragrecord::ACTIVE_CREATION &&
4432 scanPtrP->lcpScan))
4433 {
4434 return true;
4435 }
4436 return false;
4437 }
4438
4439 inline
is_lcp_idle(LcpRecord * lcpPtrP)4440 bool Dblqh::is_lcp_idle(LcpRecord *lcpPtrP)
4441 {
4442 if (lcpPtrP->lcpPrepareState == LcpRecord::LCP_IDLE &&
4443 lcpPtrP->lcpRunState == LcpRecord::LCP_IDLE)
4444 return true;
4445 return false;
4446 }
4447 #endif
4448
4449 #undef JAM_FILE_ID
4450
4451 #endif
4452