1 /*
2 Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #ifndef DBTC_H
26 #define DBTC_H
27
28 #ifndef DBTC_STATE_EXTRACT
29 #include <ndb_limits.h>
30 #include <pc.hpp>
31 #include <SimulatedBlock.hpp>
32 #include <RWPool.hpp>
33 #include <DLHashTable.hpp>
34 #include <IntrusiveList.hpp>
35 #include <DataBuffer.hpp>
36 #include <Bitmask.hpp>
37 #include <AttributeList.hpp>
38 #include <signaldata/DihScanTab.hpp>
39 #include <signaldata/AttrInfo.hpp>
40 #include <signaldata/LqhTransConf.hpp>
41 #include <signaldata/LqhKey.hpp>
42 #include <signaldata/TrigAttrInfo.hpp>
43 #include <signaldata/TcIndx.hpp>
44 #include <signaldata/TransIdAI.hpp>
45 #include <signaldata/EventReport.hpp>
46 #include <trigger_definitions.h>
47 #include <SignalCounter.hpp>
48 #include <KeyTable.hpp>
49 #include <portlib/NdbTick.h>
50 #include "TransientPool.hpp"
51 #include "TransientSlotPool.hpp"
52 #endif
53
54
55 #define JAM_FILE_ID 350
56
57 #define TIME_TRACK_HISTOGRAM_RANGES 32
58 #define TIME_TRACK_LOG_HISTOGRAM_RANGES 5
59 #define TIME_TRACK_INITIAL_RANGE_VALUE 50
60
61 #ifdef DBTC_C
62 /*
63 * 2.2 LOCAL SYMBOLS
64 * -----------------
65 */
66 #define Z8NIL 255
67 #define ZCLOSED 2
68 #define ZCOMMITING 0 /* VALUE FOR TRANSTATUS */
69 #define ZCOMMIT_SETUP 2
70 #define ZCONTINUE_ABORT_080 4
71 #define ZGCP_FILESIZE 10
72 #define ZINTSPH1 1
73 #define ZINTSPH2 2
74 #define ZINTSPH3 3
75 #define ZINTSPH6 6
76 #define ZLASTPHASE 255
77 #define ZNODEBUF_FILESIZE 2000
78 #define ZNR_OF_SEIZE 10
79 #define ZSCANREC_FILE_SIZE 100
80 #define ZSCAN_OPREC_FILE_SIZE 400
81 #define ZSPH1 1
82 #define ZTABREC_FILESIZE 16
83 #define ZTAKE_OVER_ACTIVE 1
84 #define ZTAKE_OVER_IDLE 0
85 #define ZTC_CONNECT_FILESIZE 200
86 #define ZTCOPCONF_SIZE 6
87
88 // ----------------------------------------
89 // Error Codes for Scan
90 // ----------------------------------------
91 #define ZNO_CONCURRENCY_ERROR 242
92 #define ZTOO_HIGH_CONCURRENCY_ERROR 244
93 #define ZNO_SCANREC_ERROR 245
94 #define ZNO_FRAGMENT_ERROR 246
95 #define ZSCAN_AI_LEN_ERROR 269
96 #define ZSCAN_LQH_ERROR 270
97 #define ZSCAN_FRAG_LQH_ERROR 274
98
99 #define ZSCANTIME_OUT_ERROR 296
100 #define ZSCANTIME_OUT_ERROR2 297
101
102 // ----------------------------------------
103 // Error Codes for transactions
104 // ----------------------------------------
105 #define ZSTATE_ERROR 202
106 #define ZLENGTH_ERROR 207 // Also Scan
107 #define ZERO_KEYLEN_ERROR 208
108 #define ZSIGNAL_ERROR 209
109 #define ZGET_ATTRBUF_ERROR 217 // Also Scan
110 #define ZGET_DATAREC_ERROR 218
111 #define ZMORE_AI_IN_TCKEYREQ_ERROR 220
112 #define ZTOO_MANY_FIRED_TRIGGERS 221
113 #define ZCOMMITINPROGRESS 230
114 #define ZROLLBACKNOTALLOWED 232
115 #define ZNO_FREE_TC_CONNECTION 233 // Also Scan
116 #define ZABORTINPROGRESS 237
117 #define ZPREPAREINPROGRESS 238
118 #define ZWRONG_SCHEMA_VERSION_ERROR 241 // Also Scan
119 #define ZSCAN_NODE_ERROR 250
120 #define ZNO_FRAG_LOCATION_RECORD_ERROR 251
121 #define ZTRANS_STATUS_ERROR 253
122 #define ZTIME_OUT_ERROR 266
123 #define ZSIMPLE_READ_WITHOUT_AI 271
124 #define ZNO_AI_WITH_UPDATE 272
125 #define ZSEIZE_API_COPY_ERROR 275
126 #define ZSCANINPROGRESS 276
127 #define ZABORT_ERROR 277
128 #define ZCOMMIT_TYPE_ERROR 278
129
130 #define ZNO_FREE_TC_MARKER 279
131 #define ZNO_FREE_TC_MARKER_DATABUFFER 273
132 #define ZNODE_SHUTDOWN_IN_PROGRESS 280
133 #define ZCLUSTER_SHUTDOWN_IN_PROGRESS 281
134 #define ZWRONG_STATE 282
135 #define ZINCONSISTENT_TRIGGER_STATE 293
136 #define ZCLUSTER_IN_SINGLEUSER_MODE 299
137
138 #define ZDROP_TABLE_IN_PROGRESS 283
139 #define ZNO_SUCH_TABLE 284
140 #define ZUNKNOWN_TABLE_ERROR 285
141 #define ZNODEFAIL_BEFORE_COMMIT 286
142 #define ZINDEX_CORRUPT_ERROR 287
143 #define ZSCAN_FRAGREC_ERROR 291
144 #define ZMISSING_TRIGGER_DATA 240
145 #define ZINCONSISTENT_INDEX_USE 4349
146
147 // ----------------------------------------
148 // Seize error
149 // ----------------------------------------
150 #define ZNO_FREE_API_CONNECTION 219
151 #define ZSYSTEM_NOT_STARTED_ERROR 203
152
153 // ----------------------------------------
154 // Release errors
155 // ----------------------------------------
156 #define ZINVALID_CONNECTION 229
157
158
159 #define ZNOT_FOUND 626
160 #define ZALREADYEXIST 630
161 #define ZNOTUNIQUE 893
162 #define ZFK_NO_PARENT_ROW_EXISTS 255
163 #define ZFK_CHILD_ROW_EXISTS 256
164
165 #define ZINVALID_KEY 290
166 #define ZUNLOCKED_IVAL_TOO_HIGH 294
167 #define ZUNLOCKED_OP_HAS_BAD_STATE 295
168 #define ZBAD_DIST_KEY 298
169 #define ZTRANS_TOO_BIG 261
170 #endif
171
172 class Dbtc
173 #ifndef DBTC_STATE_EXTRACT
174 : public SimulatedBlock
175 #endif
176 {
177 public:
178
179 #ifndef DBTC_STATE_EXTRACT
180 /**
181 * Incase of mt-TC...only one instance will perform actual take-over
182 * let this be TAKE_OVER_INSTANCE
183 */
184 STATIC_CONST( TAKE_OVER_INSTANCE = 1 );
185 #endif
186
187 enum ConnectionState {
188 CS_CONNECTED = 0,
189 CS_DISCONNECTED = 1,
190 CS_STARTED = 2,
191 CS_RECEIVING = 3,
192 CS_RESTART = 7,
193 CS_ABORTING = 8,
194 CS_COMPLETING = 9,
195 CS_COMPLETE_SENT = 10,
196 CS_PREPARE_TO_COMMIT = 11,
197 CS_COMMIT_SENT = 12,
198 CS_START_COMMITTING = 13,
199 CS_COMMITTING = 14,
200 CS_REC_COMMITTING = 15,
201 CS_WAIT_ABORT_CONF = 16,
202 CS_WAIT_COMPLETE_CONF = 17,
203 CS_WAIT_COMMIT_CONF = 18,
204 CS_FAIL_ABORTING = 19,
205 CS_FAIL_ABORTED = 20,
206 CS_FAIL_PREPARED = 21,
207 CS_FAIL_COMMITTING = 22,
208 CS_FAIL_COMMITTED = 23,
209 CS_FAIL_COMPLETED = 24,
210 CS_START_SCAN = 25,
211
212 /**
213 * Sending FIRE_TRIG_REQ
214 */
215 CS_SEND_FIRE_TRIG_REQ = 26,
216
217 /**
218 * Waiting for FIRE_TRIG_CONF/REF (or operations generated by this)
219 */
220 CS_WAIT_FIRE_TRIG_REQ = 27
221 };
222
223 #ifndef DBTC_STATE_EXTRACT
224 enum OperationState {
225 OS_CONNECTED = 1,
226 OS_OPERATING = 2,
227 OS_PREPARED = 3,
228 OS_COMMITTING = 4,
229 OS_COMMITTED = 5,
230 OS_COMPLETING = 6,
231 OS_COMPLETED = 7,
232
233 OS_ABORTING = 9,
234 OS_ABORT_SENT = 10,
235 OS_TAKE_OVER = 11,
236 OS_WAIT_DIH = 12,
237 OS_WAIT_KEYINFO = 13,
238 OS_WAIT_ATTR = 14,
239 OS_WAIT_COMMIT_CONF = 15,
240 OS_WAIT_ABORT_CONF = 16,
241 OS_WAIT_COMPLETE_CONF = 17,
242
243 OS_FIRE_TRIG_REQ = 19,
244 };
245
246 enum AbortState {
247 AS_IDLE = 0,
248 AS_ACTIVE = 1
249 };
250
251 enum HostState {
252 HS_ALIVE = 0,
253 HS_DEAD = 1
254 };
255
256 enum LqhTransState {
257 LTS_IDLE = 0,
258 LTS_ACTIVE = 1
259 };
260
261 enum FailState {
262 FS_IDLE = 0,
263 FS_LISTENING = 1,
264 FS_COMPLETING = 2
265 };
266
267 enum SystemStartState {
268 SSS_TRUE = 0,
269 SSS_FALSE = 1
270 };
271
272 enum TimeOutCheckState {
273 TOCS_TRUE = 0,
274 TOCS_FALSE = 1
275 };
276
277 enum ReturnSignal {
278 RS_NO_RETURN = 0,
279 RS_TCKEYCONF = 1,
280 RS_TC_COMMITCONF = 3,
281 RS_TCROLLBACKCONF = 4,
282 RS_TCROLLBACKREP = 5
283 };
284
285 enum IndexOperationState {
286 IOS_NOOP = 0,
287 IOS_INDEX_ACCESS = 1,
288 IOS_INDEX_ACCESS_WAIT_FOR_TCKEYCONF = 2,
289 IOS_INDEX_ACCESS_WAIT_FOR_TRANSID_AI = 3
290 };
291
292 enum IndexState {
293 IS_BUILDING = 0, // build in progress, start state at create
294 IS_ONLINE = 1, // ready to use
295 IS_OFFLINE = 2 // not in use
296 };
297
298 /* Sub states of IndexOperation while waiting for TransId_AI
299 * from index table lookup
300 */
301 enum IndexTransIdAIState {
302 ITAS_WAIT_HEADER = 0, // Initial state
303 ITAS_WAIT_FRAGID = 1, // Waiting for fragment id word
304 ITAS_WAIT_KEY = 2, // Waiting for (more) key information
305 ITAS_ALL_RECEIVED = 3, // All TransIdAI info received
306 ITAS_WAIT_KEY_FAIL = 4 // Failed collecting key
307 };
308
309 /**--------------------------------------------------------------------------
310 * LOCAL SYMBOLS PER 'SYMBOL-VALUED' VARIABLE
311 *
312 *
313 * NSYMB ZTC_CONNECT_FILESIZE = 200
314 * NSYMB ZHOST_FILESIZE = 16
315 * NSYMB ZDATABUF_FILESIZE = 4000
316 * NSYMB ZATTRBUF_FILESIZE = 4000
317 * NSYMB ZGCP_FILESIZE = 10
318 *
319 *
320 * ABORTED CODES
321 * TPHASE NSYMB ZSPH1 = 1
322 * NSYMB ZLASTPHASE = 255
323 *
324 *
325 * LQH_TRANS
326 * NSYMB ZTRANS_ABORTED = 1
327 * NSYMB ZTRANS_PREPARED = 2
328 * NSYMB ZTRANS_COMMITTED = 3
329 * NSYMB ZCOMPLETED_LQH_TRANS = 4
330 * NSYMB ZTRANS_COMPLETED = 5
331 *
332 *
333 * TAKE OVER
334 * NSYMB ZTAKE_OVER_IDLE = 0
335 * NSYMB ZTAKE_OVER_ACTIVE = 1
336 *
337 * ATTRBUF (ATTRBUF_RECORD)
338 * NSYMB ZINBUF_DATA_LEN = 24
339 * NSYMB ZINBUF_NEXTFREE = 25 (NOT USED )
340 * NSYMB ZINBUF_PREV = 26
341 * NSYMB ZINBUF_NEXT = 27
342 -------------------------------------------------------------------------*/
343 /*
344 2.3 RECORDS AND FILESIZES
345 -------------------------
346 */
347 typedef DataBufferSegment<11, RT_DBTC_ATTRIBUTE_BUFFER> AttributeBufferSegment;
348 typedef TransientPool<AttributeBufferSegment> AttributeBuffer_pool;
349 STATIC_CONST(DBTC_ATTRIBUTE_BUFFER_TRANSIENT_POOL_INDEX = 0);
350 typedef DataBuffer<11, AttributeBuffer_pool, RT_DBTC_ATTRIBUTE_BUFFER>
351 AttributeBuffer;
352 typedef LocalDataBuffer<11, AttributeBuffer_pool, RT_DBTC_ATTRIBUTE_BUFFER>
353 LocalAttributeBuffer;
354
355 /* **************************************************************** */
356 /* ---------------------------------------------------------------- */
357 /* ------------------- TRIGGER AND INDEX DATA --------------------- */
358 /* ---------------------------------------------------------------- */
359 /* **************************************************************** */
360 /* ********* DEFINED TRIGGER DATA ********* */
361 /* THIS RECORD FORMS LISTS OF ACTIVE */
362 /* TRIGGERS FOR EACH TABLE. */
363 /* THE RECORDS ARE MANAGED BY A TRIGGER */
364 /* POOL WHERE A TRIGGER RECORD IS SEIZED */
365 /* WHEN A TRIGGER IS ACTIVATED AND RELEASED */
366 /* WHEN THE TRIGGER IS DEACTIVATED. */
367 /* **************************************** */
368 struct TcDefinedTriggerData {
TcDefinedTriggerDataDbtc::TcDefinedTriggerData369 TcDefinedTriggerData() {}
370 /**
371 * Trigger id, used to identify the trigger
372 */
373 UintR triggerId;
374
375 Uint32 refCount;
376
377 /**
378 * Trigger type, defines what the trigger is used for
379 */
380 TriggerType::Value triggerType;
381
382 /**
383 * Trigger type, defines what the trigger is used for
384 */
385 TriggerEvent::Value triggerEvent;
386
387 /**
388 * Next ptr (used in pool/list)
389 */
390 union {
391 Uint32 nextPool;
392 Uint32 nextList;
393 };
394
395 /**
396 * Index id, only used by secondary_index triggers. This is same as
397 * index table id in DICT.
398 **/
399 union {
400 Uint32 indexId; // For unique index trigger
401 Uint32 tableId; // For reorg trigger
402 Uint32 fkId; // For FK trigger
403 };
404
405 /**
406 * Prev pointer (used in list)
407 */
408 Uint32 prevList;
409
410 Uint32 oldTriggerIds[2]; // For upgrade :(
411
printDbtc::TcDefinedTriggerData412 inline void print(NdbOut & s) const {
413 s << "[DefinedTriggerData = " << triggerId << "]";
414 }
415 };
416 typedef Ptr<TcDefinedTriggerData> DefinedTriggerPtr;
417 typedef ArrayPool<TcDefinedTriggerData> TcDefinedTriggerData_pool;
418 typedef DLList<TcDefinedTriggerData_pool> TcDefinedTriggerData_list;
419
420 /**
421 * Pool of trigger data record
422 */
423 TcDefinedTriggerData_pool c_theDefinedTriggerPool;
424 RSS_AP_SNAPSHOT(c_theDefinedTriggerPool);
425
426 /**
427 * The list of active triggers
428 */
429 TcDefinedTriggerData_list c_theDefinedTriggers;
430
431 alignas(64) AttributeBuffer_pool c_theAttributeBufferPool;
432 RSS_AP_SNAPSHOT(c_theAttributeBufferPool);
433
434 typedef DataBufferSegment<5, RT_DBTC_COMMIT_ACK_MARKER_BUFFER>
435 CommitAckMarkerSegment;
436 typedef TransientPool<CommitAckMarkerSegment> CommitAckMarkerBuffer_pool;
437 STATIC_CONST(DBTC_COMMIT_ACK_MARKER_BUFFER_TRANSIENT_POOL_INDEX = 1);
438 typedef DataBuffer<5,
439 CommitAckMarkerBuffer_pool,
440 RT_DBTC_COMMIT_ACK_MARKER_BUFFER> CommitAckMarkerBuffer;
441 typedef LocalDataBuffer<5,
442 CommitAckMarkerBuffer_pool,
443 RT_DBTC_COMMIT_ACK_MARKER_BUFFER>
444 LocalCommitAckMarkerBuffer;
445
446 alignas(64) CommitAckMarkerBuffer_pool c_theCommitAckMarkerBufferPool;
447 RSS_AP_SNAPSHOT(c_theCommitAckMarkerBufferPool);
448
449 /* ********** FIRED TRIGGER DATA ********** */
450 /* THIS RECORD FORMS LISTS OF FIRED */
451 /* TRIGGERS FOR A TRANSACTION. */
452 /* THE RECORDS ARE MANAGED BY A TRIGGER */
453 /* POOL WHERE A TRIGGER RECORD IS SEIZED */
454 /* WHEN A TRIGGER IS ACTIVATED AND RELEASED */
455 /* WHEN THE TRIGGER IS DEACTIVATED. */
456 /* **************************************** */
457 struct TcFiredTriggerData
458 {
459 STATIC_CONST( TYPE_ID = RT_DBTC_FIRED_TRIGGER_DATA );
460
TcFiredTriggerDataDbtc::TcFiredTriggerData461 TcFiredTriggerData()
462 : m_magic(Magic::make(TYPE_ID))
463 {}
464
465 Uint32 m_magic;
466
467 /**
468 * Trigger id, used to identify the trigger
469 **/
470 Uint32 triggerId;
471
472 /**
473 * The operation that fired the trigger
474 */
475 Uint32 fireingOperation;
476
477 /**
478 * The fragment id of the firing operation. This will be appended
479 * to the Primary Key such that the record can be found even in the
480 * case of user defined partitioning.
481 */
482 Uint32 fragId;
483
484 /**
485 * Used for scrapping in case of node failure
486 */
487 NodeId nodeId;
488
489 /**
490 * Trigger type, defines what the trigger is used for
491 */
492 TriggerType::Value triggerType;
493
494 /**
495 * Trigger type, defines what the trigger is used for
496 */
497 TriggerEvent::Value triggerEvent;
498
499 /**
500 * Trigger attribute info, primary key value(s)
501 */
502 AttributeBuffer::Head keyValues;
503
504 /**
505 * Trigger attribute info, attribute value(s) before operation
506 */
507 AttributeBuffer::Head beforeValues;
508
509 /**
510 * Trigger attribute info, attribute value(s) after operation
511 */
512 AttributeBuffer::Head afterValues;
513
514 /**
515 * Next ptr (used in list/hash)
516 */
517 union
518 {
519 Uint32 nextList;
520 Uint32 nextHash;
521 };
522
523 /**
524 * Prev pointer (used in list)
525 */
526 union {
527 Uint32 prevList;
528 Uint32 prevHash;
529 };
530
printDbtc::TcFiredTriggerData531 inline void print(NdbOut & s) const {
532 s << "[FiredTriggerData = " << triggerId << "]";
533 }
534
hashValueDbtc::TcFiredTriggerData535 inline Uint32 hashValue() const {
536 return fireingOperation ^ nodeId;
537 }
538
equalDbtc::TcFiredTriggerData539 inline bool equal(const TcFiredTriggerData & rec) const {
540 return fireingOperation == rec.fireingOperation && nodeId == rec.nodeId;
541 }
542 };
543 typedef Ptr<TcFiredTriggerData> FiredTriggerPtr;
544 typedef TransientPool<TcFiredTriggerData> TcFiredTriggerData_pool;
545 STATIC_CONST(DBTC_FIRED_TRIGGER_DATA_TRANSIENT_POOL_INDEX = 2);
546 typedef LocalDLFifoList<TcFiredTriggerData_pool> Local_TcFiredTriggerData_fifo;
547 typedef DLHashTable<TcFiredTriggerData_pool> TcFiredTriggerData_hash;
548
549 /**
550 * Pool of trigger data record
551 */
552 alignas(64) TcFiredTriggerData_pool c_theFiredTriggerPool;
553 TcFiredTriggerData_hash c_firedTriggerHash;
554 RSS_AP_SNAPSHOT(c_theFiredTriggerPool);
555
556 Uint32 c_maxNumberOfDefinedTriggers;
557
558 // Max number of outstanding FireTrigRequests per transaction
559 static const Uint32 MaxOutstandingFireTrigReqPerTrans = 32;
560
561 struct AttrInfoRecord {
562 /**
563 * Pre-allocated AttrInfo signal
564 */
565 AttrInfo attrInfo;
566
567 /**
568 * Next ptr (used in pool/list)
569 */
570 union {
571 Uint32 nextPool;
572 Uint32 nextList;
573 };
574 /**
575 * Prev pointer (used in list)
576 */
577 Uint32 prevList;
578 };
579
580
581 /* ************* INDEX DATA *************** */
582 /* THIS RECORD FORMS LISTS OF ACTIVE */
583 /* INDEX FOR EACH TABLE. */
584 /* THE RECORDS ARE MANAGED BY A INDEX */
585 /* POOL WHERE AN INDEX RECORD IS SEIZED */
586 /* WHEN AN INDEX IS CREATED AND RELEASED */
587 /* WHEN THE INDEX IS DROPPED. */
588 /* **************************************** */
589 struct TcIndexData {
TcIndexDataDbtc::TcIndexData590 TcIndexData() :
591 indexState(IS_OFFLINE)
592 {}
593
594 /**
595 * IndexState
596 */
597 IndexState indexState;
598
599 /**
600 * Index id, same as index table id in DICT
601 */
602 Uint32 indexId;
603
604 /**
605 * Index attribute list. Only the length is used in v21x.
606 */
607 IndexAttributeList attributeList;
608
609 /**
610 * Primary table id, the primary table to be indexed
611 */
612 Uint32 primaryTableId;
613
614 /**
615 * Primary key position in secondary table
616 */
617 Uint32 primaryKeyPos;
618
619 /**
620 * Next ptr (used in pool/list)
621 */
622 union {
623 Uint32 nextPool;
624 Uint32 nextList;
625 };
626 /**
627 * Prev pointer (used in list)
628 */
629 Uint32 prevList;
630 };
631
632 typedef Ptr<TcIndexData> TcIndexDataPtr;
633 typedef ArrayPool<TcIndexData> TcIndexData_pool;
634 typedef DLList<TcIndexData_pool> TcIndexData_list;
635
636 /**
637 * Pool of index data record
638 */
639 TcIndexData_pool c_theIndexPool;
640 RSS_AP_SNAPSHOT(c_theIndexPool);
641
642 /**
643 * The list of defined indexes
644 */
645 TcIndexData_list c_theIndexes;
646 UintR c_maxNumberOfIndexes;
647
648 struct TcIndexOperation {
649 STATIC_CONST( TYPE_ID = RT_DBTC_INDEX_OPERATION );
650
TcIndexOperationDbtc::TcIndexOperation651 TcIndexOperation() :
652 m_magic(Magic::make(TYPE_ID)),
653 indexOpState(IOS_NOOP),
654 pendingKeyInfo(0),
655 keyInfoSectionIVal(RNIL),
656 pendingAttrInfo(0),
657 attrInfoSectionIVal(RNIL),
658 transIdAIState(ITAS_WAIT_HEADER),
659 pendingTransIdAI(0),
660 transIdAISectionIVal(RNIL),
661 indexReadTcConnect(RNIL),
662 savedFlags(0)
663 {}
664
~TcIndexOperationDbtc::TcIndexOperation665 ~TcIndexOperation()
666 {
667 }
668
669 Uint32 m_magic;
670
671 // Index data
672 Uint32 indexOpId;
673 IndexOperationState indexOpState; // Used to mark on-going TcKeyReq
674 Uint32 pendingKeyInfo;
675 Uint32 keyInfoSectionIVal;
676 Uint32 pendingAttrInfo;
677 Uint32 attrInfoSectionIVal;
678 IndexTransIdAIState transIdAIState;
679 Uint32 pendingTransIdAI;
680 Uint32 transIdAISectionIVal; // For accumulating TransId_AI
681 Uint32 fragmentId;
682
683 TcKeyReq tcIndxReq;
684 UintR connectionIndex;
685 UintR indexReadTcConnect; //
686
687 Uint32 savedFlags; // Saved transaction flags
688
689 /**
690 * Next ptr (used in list)
691 */
692 Uint32 nextList;
693 /**
694 * Prev pointer (used in list)
695 */
696 Uint32 prevList;
697 };
698
699 typedef Ptr<TcIndexOperation> TcIndexOperationPtr;
700 typedef TransientPool<TcIndexOperation> TcIndexOperation_pool;
701 STATIC_CONST(DBTC_INDEX_OPERATION_TRANSIENT_POOL_INDEX = 3);
702 typedef LocalDLList<TcIndexOperation_pool> LocalTcIndexOperation_dllist;
703
704 /**
705 * Pool of index data record
706 */
707 alignas(64) TcIndexOperation_pool c_theIndexOperationPool;
708 RSS_AP_SNAPSHOT(c_theIndexOperationPool);
709
710 struct TcFKData {
TcFKDataDbtc::TcFKData711 TcFKData() {}
712
713 Uint32 m_magic;
714
715 union {
716 Uint32 key;
717 Uint32 fkId;
718 };
719
720 /**
721 * Columns used in parent table
722 */
723 IndexAttributeList parentTableColumns;
724
725 /**
726 * Columns used in child table
727 */
728 IndexAttributeList childTableColumns;
729
730 Uint32 parentTableId; // could be unique index table...
731 Uint32 childTableId; //
732 Uint32 childIndexId; // (could be tableId too)
733 Uint32 bits; // CreateFKImplReq::Bits
734
735 Uint32 nextPool;
736 Uint32 nextHash;
737 Uint32 prevHash;
738
hashValueDbtc::TcFKData739 Uint32 hashValue() const {
740 return key;
741 }
742
equalDbtc::TcFKData743 bool equal(const TcFKData& obj) const {
744 return key == obj.key;
745 }
746 };
747
748 typedef RecordPool<RWPool<TcFKData> > FK_pool;
749 typedef KeyTable<FK_pool> FK_hash;
750
751 FK_pool c_fk_pool;
752 FK_hash c_fk_hash;
753
754 /************************** TC CONNECT RECORD ************************/
755 /* *******************************************************************/
756 /* TC CONNECT RECORD KEEPS ALL INFORMATION TO CARRY OUT A TRANSACTION*/
757 /* THE TRANSACTION CONTROLLER ESTABLISHES CONNECTIONS TO DIFFERENT */
758 /* BLOCKS TO CARRY OUT THE TRANSACTION. THERE CAN BE SEVERAL RECORDS */
759 /* PER ACTIVE TRANSACTION. THE TC CONNECT RECORD COOPERATES WITH THE */
760 /* API CONNECT RECORD FOR COMMUNICATION WITH THE API AND WITH THE */
761 /* LQH CONNECT RECORD FOR COMMUNICATION WITH THE LQH'S INVOLVED IN */
762 /* THE TRANSACTION. TC CONNECT RECORD IS PERMANENTLY CONNECTED TO A */
763 /* RECORD IN DICT AND ONE IN DIH. IT CONTAINS A LIST OF ACTIVE LQH */
764 /* CONNECT RECORDS AND A LIST OF STARTED BUT NOT ACTIVE LQH CONNECT */
765 /* RECORDS. IT DOES ALSO CONTAIN A LIST OF ALL OPERATIONS THAT ARE */
766 /* EXECUTED WITH THE TC CONNECT RECORD. */
767 /*******************************************************************>*/
768 /* TC_CONNECT RECORD ALIGNED TO BE 128 BYTES */
769 /*******************************************************************>*/
770 struct TcConnectRecord
771 {
772 STATIC_CONST( TYPE_ID = RT_DBTC_CONNECT_RECORD );
773
TcConnectRecordDbtc::TcConnectRecord774 TcConnectRecord()
775 : m_magic(Magic::make(TYPE_ID)),
776 commitAckMarker(RNIL),
777 tcConnectstate(OS_CONNECTED),
778 apiConnect(RNIL),
779 nextList(RNIL),
780 noOfNodes(0),
781 m_overtakeable_operation(0),
782 m_special_op_flags(0),
783 prevList(RNIL),
784 triggeringOperation(RNIL),
785 indexOp(RNIL),
786 currentTriggerId(RNIL)
787 {
788 NdbTick_Invalidate(&m_start_ticks);
789 thePendingTriggers.init();
790 }
791
792 Uint32 m_magic;
793 //---------------------------------------------------
794 // First 16 byte cache line. Those variables are only
795 // used in error cases.
796 //---------------------------------------------------
797 UintR tcOprec; /* TC OPREC of operation being taken over */
798 Uint16 failData[4]; /* Failed nodes when taking over an operation */
799 UintR nextTcFailHash;
800
801 //---------------------------------------------------
802 // Second 16 byte cache line. Those variables are used
803 // from LQHKEYCONF to sending COMMIT and COMPLETED.
804 //---------------------------------------------------
805 UintR lastLqhCon; /* Connect record in last replicas Lqh record */
806 Uint16 lastLqhNodeId; /* Node id of last replicas Lqh */
807 Uint16 m_execAbortOption;/* TcKeyReq::ExecuteAbortOption */
808 UintR commitAckMarker; /* CommitMarker I value */
809
810 //---------------------------------------------------
811 // Third 16 byte cache line. The hottest variables.
812 //---------------------------------------------------
813 OperationState tcConnectstate; /* THE STATE OF THE CONNECT*/
814 UintR apiConnect; /* POINTER TO API CONNECT RECORD */
815 UintR nextList; /* NEXT TC RECORD*/
816 Uint8 dirtyOp;
817 Uint8 opSimple;
818 Uint8 lastReplicaNo; /* NUMBER OF THE LAST REPLICA IN THE OPERATION */
819 Uint8 noOfNodes; /* TOTAL NUMBER OF NODES IN OPERATION */
820 Uint8 operation; /* OPERATION TYPE */
821 /* 0 = READ REQUEST */
822 /* 1 = UPDATE REQUEST */
823 /* 2 = INSERT REQUEST */
824 /* 3 = DELETE REQUEST */
825 Uint8 m_overtakeable_operation;
826 Uint16 m_special_op_flags; // See ApiConnectRecord::SpecialOpFlags
827 enum SpecialOpFlags {
828 SOF_NORMAL = 0,
829 SOF_INDEX_TABLE_READ = 1, // Read index table
830 SOF_REORG_TRIGGER = 4, // A reorg trigger
831 SOF_REORG_MOVING = 8, // A record that should be moved
832 SOF_TRIGGER = 16, // A trigger
833 SOF_REORG_COPY = 32,
834 SOF_REORG_DELETE = 64,
835 SOF_DEFERRED_UK_TRIGGER = 128, // Op has deferred trigger
836 SOF_DEFERRED_FK_TRIGGER = 256,
837 SOF_FK_READ_COMMITTED = 512, // reply to TC even for dirty read
838 SOF_FULLY_REPLICATED_TRIGGER = 1024,
839 SOF_UTIL_FLAG = 2048 // Sender to TC is DBUTIL (higher prio)
840 };
841
isIndexOpDbtc::TcConnectRecord842 static inline bool isIndexOp(Uint16 flags) {
843 return (flags & SOF_INDEX_TABLE_READ) != 0;
844 }
845
846 //---------------------------------------------------
847 // Fourth 16 byte cache line. The mildly hot variables.
848 // tcNodedata expands 4 Bytes into the next cache line
849 // with indexes almost never used.
850 //---------------------------------------------------
851 UintR clientData; /* SENDERS OPERATION POINTER */
852 UintR prevList; /* DOUBLY LINKED LIST OF TC CONNECT RECORDS*/
853 UintR savePointId;
854
855 Uint16 tcNodedata[4];
856 /* Instance key to send to LQH. Receiver maps it to actual instance. */
857 Uint16 lqhInstanceKey;
858
859 // Trigger data
860 UintR numFiredTriggers; // As reported by lqhKeyConf
861 UintR numReceivedTriggers; // FIRE_TRIG_ORD
862 UintR triggerExecutionCount;// No of outstanding op due to triggers
863 UintR savedState[LqhKeyConf::SignalLength];
864 /**
865 * The list of pending fired triggers
866 */
867 Local_TcFiredTriggerData_fifo::Head thePendingTriggers;
868
869 UintR triggeringOperation; // Which operation was "cause" of this op
870
871 // Index data
872 UintR indexOp;
873 UintR currentTriggerId;
874 union {
875 Uint32 attrInfoLen;
876 Uint32 triggerErrorCode;
877 };
878 NDB_TICKS m_start_ticks;
879 };
880
881 typedef Ptr<TcConnectRecord> TcConnectRecordPtr;
882 typedef TransientPool<TcConnectRecord> TcConnectRecord_pool;
883 STATIC_CONST(DBTC_CONNECT_RECORD_TRANSIENT_POOL_INDEX = 4);
884 typedef LocalDLFifoList<TcConnectRecord_pool> LocalTcConnectRecord_fifo;
885
886 /************************** API CONNECT RECORD ***********************
887 * The API connect record contains the connection record to which the
888 * application connects.
889 *
890 * The application can send one operation at a time. It can send a
891 * new operation immediately after sending the previous operation.
892 * Thereby several operations can be active in one transaction within TC.
893 * This is achieved by using the API connect record.
894 * Each active operation is handled by the TC connect record.
895 * As soon as the TC connect record has sent the
896 * request to the LQH it is ready to receive new operations.
897 * The LQH connect record takes care of waiting for an operation to
898 * complete.
899 * When an operation has completed on the LQH connect record,
900 * a new operation can be started on this LQH connect record.
901 *******************************************************************
902 *
903 * API CONNECT RECORD ALIGNED TO BE 256 BYTES
904 ********************************************************************/
905
906 /*******************************************************************>*/
907 // We break out the API Timer for optimisation on scanning rather than
908 // on fast access.
909 /*******************************************************************>*/
910 struct ApiConTimers
911 {
912 STATIC_CONST( TYPE_ID = RT_DBTC_API_CONNECT_TIMERS );
913 STATIC_CONST( INDEX_BITS = 3 );
914 STATIC_CONST( INDEX_MASK = (1 << INDEX_BITS) - 1 );
915 STATIC_CONST( INDEX_MAX_COUNT = (1 << INDEX_BITS) - 2 );
916
917 struct TimerEntry
918 {
919 Uint32 m_timer;
920 Uint32 m_apiConnectRecord;
921 };
922
ApiConTimersDbtc::ApiConTimers923 ApiConTimers():
924 m_magic(Magic::make(TYPE_ID)),
925 m_count(0),
926 m_top(0),
927 nextList(RNIL),
928 prevList(RNIL)
929 {}
930
static_assertsDbtc::ApiConTimers931 static void static_asserts()
932 {
933 STATIC_ASSERT(sizeof(ApiConTimers) ==
934 sizeof(TimerEntry) << INDEX_BITS);
935 }
936
937 Uint32 m_magic;
938 Uint16 m_count;
939 Uint16 m_top;
940 Uint32 nextList;
941 Uint32 prevList;
942 TimerEntry m_entries[INDEX_MAX_COUNT];
943 };
944
945 typedef Ptr<ApiConTimers> ApiConTimersPtr;
946 typedef TransientPool<ApiConTimers> ApiConTimers_pool;
947 STATIC_CONST(DBTC_API_CONNECT_TIMERS_TRANSIENT_POOL_INDEX = 5);
948 typedef LocalDLFifoList<ApiConTimers_pool> LocalApiConTimers_list;
949
950 alignas(64) ApiConTimers_pool c_apiConTimersPool;
951 RSS_AP_SNAPSHOT(c_apiConTimersPool);
952 LocalApiConTimers_list::Head c_apiConTimersList;
953 ApiConTimers* c_currentApiConTimers;
954
955 /**
956 * Limit the resource (signal/job buffer) usage of a transaction
957 * by limiting :
958 * - max cascading scans (FK child scans) and
959 * - trigger operations.
960 * An FK child scan is executed alone exclusively.
961 */
962 static const Uint8 MaxCascadingScansPerTransaction = 1;
963 static const Uint32 MaxExecutingTriggerOpsPerTrans = 32;
964
965 struct ApiConnectRecord
966 {
967 STATIC_CONST( TYPE_ID = RT_DBTC_API_CONNECT_RECORD );
968
969 enum ConnectionKind
970 {
971 CK_NONE = 0,
972 CK_FREE,
973 CK_USER,
974 CK_COPY,
975 CK_FAIL
976 };
977
978 ApiConnectRecord();
979
980 Uint32 m_magic;
981 //---------------------------------------------------
982 // First 16 byte cache line. Hot variables.
983 //---------------------------------------------------
984 Uint32 m_apiConTimer;
985 Uint32 m_apiConTimer_line; // Last line updating timer
986 ConnectionState apiConnectstate;
987 ConnectionKind apiConnectkind;
988 UintR transid[2];
989 LocalTcConnectRecord_fifo::Head tcConnect;
990
991 //---------------------------------------------------
992 // Second 16 byte cache line. Hot variables.
993 //---------------------------------------------------
994 UintR lqhkeyconfrec;
995 UintR cachePtr;
996 UintR currSavePointId;
997 UintR counter;
998
999 //---------------------------------------------------
1000 // Third 16 byte cache line. First and second cache
1001 // line plus this will be enough for copy API records.
1002 // Variables used in late phases.
1003 //---------------------------------------------------
1004 UintR nextGcpConnect;
1005 UintR prevGcpConnect;
1006 UintR gcpPointer;
1007 UintR ndbapiConnect;
1008
1009 //---------------------------------------------------
1010 // Fourth 16 byte cache line. Only used in late phases.
1011 // Plus 4 bytes of error handling.
1012 //---------------------------------------------------
1013 UintR nextApiConnect; // free, freeFail, ctransidFailHash, PREPARE_COMMIT
1014 BlockReference ndbapiBlockref;
1015 UintR apiCopyRecord;
1016 Uint64 globalcheckpointid;
1017
1018 //---------------------------------------------------
1019 // Second 64 byte cache line starts. First 16 byte
1020 // cache line in this one. Variables primarily used
1021 // in early phase.
1022 //---------------------------------------------------
1023 UintR lqhkeyreqrec;
1024 union {
1025 Uint32 buddyPtr;
1026 Int32 pendingTriggers; // For deferred triggers
1027 };
1028 union {
1029 UintR apiScanRec;
1030 UintR commitAckMarker;
1031 };
1032
1033 /**
1034 * num_commit_ack_markers
1035 *
1036 * Number of operations sent by this transaction
1037 * to LQH with their CommitAckMarker flag set.
1038 *
1039 * Includes marked operations currently in-progress and
1040 * those which prepared successfully,
1041 * Excludes failed operations (LQHKEYREF)
1042 */
1043 Uint32 num_commit_ack_markers;
1044 Uint32 m_write_count;
1045 ReturnSignal returnsignal;
1046 AbortState abortState;
1047
1048 enum TransactionFlags
1049 {
1050 TF_INDEX_OP_RETURN = 1,
1051 TF_TRIGGER_PENDING = 2, // Used to mark waiting for a CONTINUEB
1052 TF_EXEC_FLAG = 4,
1053 TF_COMMIT_ACK_MARKER_RECEIVED = 8,
1054 TF_DEFERRED_CONSTRAINTS = 16, // check constraints in deferred fashion
1055 TF_DEFERRED_UK_TRIGGERS = 32, // trans has deferred UK triggers
1056 TF_DEFERRED_FK_TRIGGERS = 64, // trans has deferred FK triggers
1057 TF_DISABLE_FK_CONSTRAINTS = 128,
1058 TF_LATE_COMMIT = 256, // Wait sending apiCommit until complete phase done
1059
1060 TF_END = 0
1061 };
1062 Uint32 m_flags;
1063
1064 Uint8 takeOverRec;
1065 Uint8 currentReplicaNo;
1066
1067 Uint8 tckeyrec; // Changed from R
1068
1069 Uint8 tcindxrec;
1070
1071 enum ApiFailStates
1072 {
1073 AFS_API_OK = 0,
1074 AFS_API_FAILED = 1,
1075 AFS_API_DISCONNECTED = 2
1076 };
1077 Uint8 apiFailState;
1078
1079 Uint8 timeOutCounter;
1080 Uint8 singleUserMode;
1081
1082 Uint8 m_pre_commit_pass;
1083
1084 // number of on-going cascading scans (FK child scans) at a transaction.
1085 Uint8 cascading_scans_count;
1086
1087 // Trigger execution loop active
1088 bool m_inExecuteTriggers;
1089
1090 Uint16 m_special_op_flags; // Used to mark on-going TcKeyReq as indx table
1091
1092 Uint16 returncode;
1093 Uint16 takeOverInd;
1094 //---------------------------------------------------
1095 // Error Handling variables. If cache line 32 bytes
1096 // ensures that cache line is still only read in
1097 // early phases.
1098 //---------------------------------------------------
1099 UintR currentTcConnect;
1100 BlockReference tcBlockref;
1101 UintR failureNr;
1102
1103 //---------------------------------------------------
1104 // Second 64 byte cache line. Third 16 byte cache line
1105 // in this one. Variables primarily used in early phase
1106 // and checked in late phase.
1107 // Fourth cache line is the tcSendArray that is used
1108 // when two and three operations are responded to in
1109 // parallel. The first two entries in tcSendArray is
1110 // part of the third cache line.
1111 //---------------------------------------------------
1112 //---------------------------------------------------
1113 // timeOutCounter is used waiting for ABORTCONF, COMMITCONF
1114 // and COMPLETECONF
1115 //---------------------------------------------------
1116 UintR tcSendArray[6];
1117 NdbNodeBitmask m_transaction_nodes;
1118
1119 // Trigger data
1120
1121 /**
1122 * The list of fired triggers
1123 */
1124 Local_TcFiredTriggerData_fifo::Head theFiredTriggers;
1125
1126 // Count the outstanding FIRE_TRIG_REQs of a transaction.
1127 // Limit it in order to avoid job buffer overload
1128 Uint32 m_outstanding_fire_trig_req;
1129
1130 // First and last indices of the local tc connect pointers that will
1131 // be used to send fire trigger reqs when resumed in execFireTrigConf
1132 // or in execCONTINUEB
1133 UintR m_firstTcConnectPtrI_FT;
1134 UintR m_lastTcConnectPtrI_FT;
1135
1136
1137 // Index data
1138
1139 UintR noIndexOp; // No outstanding index ops
1140
1141 // Index op return context
1142 UintR indexOp;
1143 UintR clientData;
1144 Uint32 errorData;
1145 UintR attrInfoLen;
1146 Uint32 immediateTriggerId; // Id of trigger op being fired NOW
1147 Uint32 firedFragId;
1148
1149 #ifdef ERROR_INSERT
1150 Uint32 continueBCount; // ERROR_INSERT 8082
1151 #endif
1152 UintR accumulatingIndexOp;
1153 UintR executingIndexOp;
1154 NDB_TICKS m_start_ticks;
1155 LocalTcIndexOperation_dllist::Head theSeizedIndexOperations;
1156 UintR tcIndxSendArray[6];
1157
isExecutingDeferredTriggersDbtc::ApiConnectRecord1158 bool isExecutingDeferredTriggers() const {
1159 return apiConnectstate == CS_SEND_FIRE_TRIG_REQ ||
1160 apiConnectstate == CS_WAIT_FIRE_TRIG_REQ ;
1161 }
1162
1163 // Number of on-going trigger operations at a transaction
1164 // Limit them in order to avoid the transaction
1165 // overloading node resources (signal/job buffers).
1166 Uint32 m_executing_trigger_ops;
1167
1168 /**
1169 * ExecTriggersGuard
1170 *
1171 * Used to avoid recursive calls of executeTriggers
1172 */
1173 class ExecTriggersGuard
1174 {
1175 ApiConnectRecord* m_recPtr;
1176 public:
ExecTriggersGuard(ApiConnectRecord * recPtr)1177 ExecTriggersGuard(ApiConnectRecord* recPtr)
1178 {
1179 if (recPtr->m_inExecuteTriggers)
1180 {
1181 m_recPtr = NULL;
1182 }
1183 else
1184 {
1185 m_recPtr = recPtr;
1186 m_recPtr->m_inExecuteTriggers = true;
1187 }
1188 }
1189
~ExecTriggersGuard()1190 ~ExecTriggersGuard()
1191 {
1192 if (m_recPtr)
1193 {
1194 assert(m_recPtr->m_inExecuteTriggers == true);
1195 m_recPtr->m_inExecuteTriggers = false;
1196 }
1197 }
1198
canExecNow() const1199 bool canExecNow() const
1200 {
1201 assert(m_recPtr == NULL ||
1202 m_recPtr->m_inExecuteTriggers);
1203 return (m_recPtr != NULL);
1204 }
1205 };
1206 };
1207
1208 typedef TransientPool<ApiConnectRecord> ApiConnectRecord_pool;
1209 STATIC_CONST(DBTC_API_CONNECT_RECORD_TRANSIENT_POOL_INDEX = 6);
1210 typedef LocalDLFifoList<ApiConnectRecord_pool, IA_GcpConnect>
1211 LocalApiConnectRecord_gcp_list;
1212 typedef LocalSLFifoList<ApiConnectRecord_pool, IA_ApiConnect>
1213 LocalApiConnectRecord_api_fifo;
1214 typedef LocalSLList<ApiConnectRecord_pool, IA_ApiConnect>
1215 LocalApiConnectRecord_api_list;
1216 typedef Ptr<ApiConnectRecord> ApiConnectRecordPtr;
1217
1218 class PrefetchApiConTimer
1219 {
1220 public:
PrefetchApiConTimer(const ApiConTimers_pool & pool,ApiConnectRecordPtr apiConPtr,bool for_write)1221 PrefetchApiConTimer(const ApiConTimers_pool& pool, ApiConnectRecordPtr apiConPtr, bool for_write)
1222 : m_apiConTimers(NULL), m_timer_index(0), checked(false)
1223 {
1224 m_apiConPtr = apiConPtr;
1225 const Uint32 apiConTimer = m_apiConPtr.p->m_apiConTimer;
1226 ApiConTimersPtr apiConTimers;
1227 require(apiConTimer != RNIL);
1228 apiConTimers.i = apiConTimer >> ApiConTimers::INDEX_BITS;
1229 if (for_write)
1230 {
1231 require(pool.getUncheckedPtrRW(apiConTimers));
1232 }
1233 else
1234 {
1235 require(pool.getUncheckedPtrRO(apiConTimers));
1236 }
1237 m_timer_index = apiConTimer & ApiConTimers::INDEX_MASK;
1238 m_apiConTimers = apiConTimers.p;
1239 }
1240
check_ptr()1241 bool check_ptr()
1242 {
1243 if (unlikely(!Magic::check_ptr(m_apiConTimers)))
1244 {
1245 return false;
1246 }
1247 assert(m_timer_index < m_apiConTimers->m_top);
1248 assert(m_apiConTimers->m_count > 0);
1249 assert(m_apiConTimers->m_entries[m_timer_index].m_apiConnectRecord ==
1250 m_apiConPtr.i);
1251 checked = true;
1252 return true;
1253 }
1254
set_timer(Uint32 value,Uint32 line)1255 void set_timer(Uint32 value, Uint32 line)
1256 {
1257 if (!checked)
1258 {
1259 require(check_ptr());
1260 }
1261 m_apiConTimers->m_entries[m_timer_index].m_timer = value;
1262 m_apiConPtr.p->m_apiConTimer_line = line;
1263 }
1264
get_timer()1265 Uint32 get_timer()
1266 {
1267 if (!checked)
1268 {
1269 require(check_ptr());
1270 }
1271 return m_apiConTimers->m_entries[m_timer_index].m_timer;
1272 }
1273 private:
1274 ApiConnectRecordPtr m_apiConPtr;
1275 ApiConTimers* m_apiConTimers;
1276 Uint32 m_timer_index;
1277 bool checked;
1278 };
1279
1280 void setApiConTimer(ApiConnectRecordPtr apiConPtr, Uint32 value, Uint32 line)
1281 #ifdef DBTC_MAIN
1282 ;
1283 #else
1284 {
1285 const Uint32 apiConTimer = apiConPtr.p->m_apiConTimer;
1286 ApiConTimersPtr apiConTimers;
1287 ndbrequire(apiConTimer != RNIL);
1288 apiConTimers.i = apiConTimer >> ApiConTimers::INDEX_BITS;
1289 c_apiConTimersPool.getPtr(apiConTimers);
1290 const Uint32 timer_index = apiConTimer & ApiConTimers::INDEX_MASK;
1291 ndbassert(timer_index < apiConTimers.p->m_top);
1292 ndbassert(apiConTimers.p->m_count > 0);
1293 ndbassert(apiConTimers.p->m_entries[timer_index].m_apiConnectRecord ==
1294 apiConPtr.i);
1295
1296 apiConTimers.p->m_entries[timer_index].m_timer = value;
1297 apiConPtr.p->m_apiConTimer_line = line;
1298 }
1299 #endif
getApiConTimer(const ApiConnectRecordPtr apiConPtr) const1300 Uint32 getApiConTimer(const ApiConnectRecordPtr apiConPtr) const
1301 {
1302 const Uint32 apiConTimer = apiConPtr.p->m_apiConTimer;
1303 ApiConTimersPtr apiConTimers;
1304 ndbrequire(apiConTimer != RNIL);
1305 apiConTimers.i = apiConTimer >> ApiConTimers::INDEX_BITS;
1306 c_apiConTimersPool.getPtr(apiConTimers);
1307 const Uint32 timer_index = apiConTimer & ApiConTimers::INDEX_MASK;
1308 ndbassert(timer_index < apiConTimers.p->m_top);
1309 ndbassert(apiConTimers.p->m_count > 0);
1310 ndbassert(apiConTimers.p->m_entries[timer_index].m_apiConnectRecord ==
1311 apiConPtr.i);
1312
1313 return apiConTimers.p->m_entries[timer_index].m_timer;
1314 }
1315
seizeApiConTimer(ApiConnectRecordPtr apiConPtr)1316 bool seizeApiConTimer(ApiConnectRecordPtr apiConPtr)
1317 {
1318 ndbrequire(apiConPtr.p->m_apiConTimer == RNIL);
1319 if (unlikely(c_currentApiConTimers == NULL) ||
1320 (c_currentApiConTimers->m_top == ApiConTimers::INDEX_MAX_COUNT))
1321 {
1322 jam();
1323 ApiConTimersPtr apiConTimersptr;
1324 if (unlikely(!c_apiConTimersPool.seize(apiConTimersptr)))
1325 {
1326 jam();
1327 return false;
1328 }
1329 LocalApiConTimers_list timers_list(c_apiConTimersPool,
1330 c_apiConTimersList);
1331 timers_list.addLast(apiConTimersptr);
1332 c_currentApiConTimers = apiConTimersptr.p;
1333 }
1334 ApiConTimers* apiConTimers = c_currentApiConTimers;
1335 const Uint32 timer_index = apiConTimers->m_top;
1336 const Uint32 apiConTimer =
1337 (c_apiConTimersList.getLast() << ApiConTimers::INDEX_BITS) | timer_index;
1338 apiConTimers->m_entries[timer_index].m_timer = 0;
1339 apiConTimers->m_entries[timer_index].m_apiConnectRecord = apiConPtr.i;
1340 apiConTimers->m_top++;
1341 apiConTimers->m_count++;
1342 apiConPtr.p->m_apiConTimer = apiConTimer;
1343 apiConPtr.p->m_apiConTimer_line = 0;
1344 return true;
1345 }
1346
releaseApiConTimer(ApiConnectRecordPtr apiConPtr)1347 void releaseApiConTimer(ApiConnectRecordPtr apiConPtr)
1348 {
1349 const Uint32 apiConTimer = apiConPtr.p->m_apiConTimer;
1350 ndbrequire(apiConTimer != RNIL);
1351
1352 ApiConTimersPtr apiConTimers;
1353 apiConTimers.i = apiConTimer >> ApiConTimers::INDEX_BITS;
1354 c_apiConTimersPool.getPtr(apiConTimers);
1355 const Uint32 timer_index = apiConTimer & ApiConTimers::INDEX_MASK;
1356 apiConTimers.p->m_entries[timer_index].m_timer = 0;
1357 apiConTimers.p->m_entries[timer_index].m_apiConnectRecord = RNIL;
1358 ndbassert(apiConTimers.p->m_count > 0);
1359 apiConTimers.p->m_count--;
1360 if (apiConTimers.p->m_count == 0)
1361 {
1362 jam();
1363 LocalApiConTimers_list timers_list(c_apiConTimersPool,
1364 c_apiConTimersList);
1365 timers_list.remove(apiConTimers);
1366 c_apiConTimersPool.release(apiConTimers);
1367 checkPoolShrinkNeed(DBTC_API_CONNECT_TIMERS_TRANSIENT_POOL_INDEX,
1368 c_apiConTimersPool);
1369 if (apiConTimers.p == c_currentApiConTimers)
1370 {
1371 jam();
1372 if (timers_list.last(apiConTimers))
1373 {
1374 c_currentApiConTimers = apiConTimers.p;
1375 }
1376 else
1377 {
1378 c_currentApiConTimers = NULL;
1379 }
1380 }
1381 }
1382 apiConPtr.p->m_apiConTimer = RNIL;
1383 }
1384
1385 // ********************** CACHE RECORD **************************************
1386 //---------------------------------------------------------------------------
1387 // This record is used between reception of TCKEYREQ and sending of LQHKEYREQ
1388 // It is separated so as to improve the cache hit rate and also to minimise
1389 // the necessary memory storage in NDB Cluster.
1390 //---------------------------------------------------------------------------
1391
1392 struct CacheRecord
1393 {
1394 STATIC_CONST( TYPE_ID = RT_DBTC_CACHE_RECORD );
1395
CacheRecordDbtc::CacheRecord1396 CacheRecord()
1397 : m_magic(Magic::make(TYPE_ID))
1398 {}
1399
1400 Uint32 m_magic;
1401
1402 /* Fields used by TCKEYREQ/TCINDXREQ/SCANTABREQ */
1403 Uint32 keyInfoSectionI; /* KeyInfo section I-val */
1404 Uint32 attrInfoSectionI; /* AttrInfo section I-val */
1405
1406 // TODO : Consider using section length + other vars for this
1407 UintR currReclenAi; /* AttrInfo words received so far */
1408 Uint16 attrlength; /* Total AttrInfo length */
1409 Uint16 save1; /* KeyInfo words received so far */
1410 Uint16 keylen; /* KEY LENGTH SENT BY REQUEST SIGNAL */
1411
1412 /* Distribution information */
1413 // TODO : Consider placing this info into other records
1414 Uint8 distributionKeyIndicator;
1415 Uint8 viaSPJFlag; /* Send request via the SPJ block.*/
1416 UintR distributionKey;
1417 /* End of fields used by TCKEYREQ/TCINDXREQ/SCANTABREQ */
1418
1419
1420 /* TCKEYREQ/TCINDXREQ only fields */
1421 UintR schemaVersion;/* SCHEMA VERSION USED IN TRANSACTION */
1422 UintR tableref; /* POINTER TO THE TABLE IN WHICH THE FRAGMENT EXISTS*/
1423
1424 UintR fragmentid; /* THE COMPUTED FRAGMENT ID */
1425 UintR hashValue; /* THE HASH VALUE USED TO LOCATE FRAGMENT */
1426
1427 Uint8 m_special_hash; // collation or distribution key
1428 Uint8 m_no_hash; // Hash not required for LQH (special variant)
1429 Uint8 m_no_disk_flag;
1430 Uint8 m_op_queue;
1431 Uint8 lenAiInTckeyreq; /* LENGTH OF ATTRIBUTE INFORMATION IN TCKEYREQ */
1432
1433 Uint8 fragmentDistributionKey; /* DIH generation no */
1434
1435 /**
1436 * EXECUTION MODE OF OPERATION
1437 * 0 = NORMAL EXECUTION, 1 = INTERPRETED EXECUTION
1438 */
1439 Uint8 opExec;
1440 Uint8 m_read_committed_base;
1441 Uint8 m_noWait;
1442
1443 /* Use of Long signals */
1444 Uint8 isLongTcKeyReq; /* Incoming TcKeyReq used long signal */
1445 Uint8 useLongLqhKeyReq; /* Outgoing LqhKeyReq should be long */
1446
1447 Uint32 scanInfo;
1448
1449 Uint32 scanTakeOverInd;
1450 Uint32 unlockNodeId; /* NodeId for unlock operation */
1451 /* End of TCKEYREQ/TCINDXREQ only fields */
1452
1453 };
1454
1455 typedef Ptr<CacheRecord> CacheRecordPtr;
1456 typedef TransientPool<CacheRecord> CacheRecord_pool;
1457 STATIC_CONST(DBTC_CACHE_RECORD_TRANSIENT_POOL_INDEX = 7);
1458 CacheRecord m_local_cache_record;
1459
1460 /* ************************ HOST RECORD ********************************** */
1461 /********************************************************/
1462 /* THIS RECORD CONTAINS ALIVE-STATUS ON ALL NODES IN THE*/
1463 /* SYSTEM */
1464 /********************************************************/
1465 struct HostRecord {
1466 struct PackedWordsContainer lqh_pack[MAX_NDBMT_LQH_THREADS+1];
1467 struct PackedWordsContainer packTCKEYCONF;
1468 HostState hostStatus;
1469 LqhTransState lqhTransStatus;
1470 bool inPackedList;
1471
1472 Uint32 m_location_domain_id;
1473
1474 enum NodeFailBits
1475 {
1476 NF_TAKEOVER = 0x1,
1477 NF_CHECK_SCAN = 0x2,
1478 NF_CHECK_TRANSACTION = 0x4,
1479 NF_BLOCK_HANDLE = 0x8,
1480 NF_NODE_FAIL_BITS = 0xF // All bits...
1481 };
1482 Uint32 m_nf_bits;
1483 NdbNodeBitmask m_lqh_trans_conf;
1484 /**
1485 * Indicator if any history to track yet
1486 *
1487 * Tracking scan and scan errors (API node)
1488 * Tracking read key, write key and index key operations
1489 * (API node and primary DB node)
1490 * Tracking scan frag and scan frag errors (API node)
1491 * Tracking transactions (API node)
1492 */
1493 Uint32 time_tracked;
1494 Uint64 time_track_scan_histogram[TIME_TRACK_HISTOGRAM_RANGES];
1495 Uint64 time_track_scan_error_histogram[TIME_TRACK_HISTOGRAM_RANGES];
1496 Uint64 time_track_read_key_histogram[TIME_TRACK_HISTOGRAM_RANGES];
1497 Uint64 time_track_write_key_histogram[TIME_TRACK_HISTOGRAM_RANGES];
1498 Uint64 time_track_index_key_histogram[TIME_TRACK_HISTOGRAM_RANGES];
1499 Uint64 time_track_key_error_histogram[TIME_TRACK_HISTOGRAM_RANGES];
1500 Uint64 time_track_scan_frag_histogram[TIME_TRACK_HISTOGRAM_RANGES];
1501 Uint64 time_track_scan_frag_error_histogram[TIME_TRACK_HISTOGRAM_RANGES];
1502 Uint64 time_track_transaction_histogram[TIME_TRACK_HISTOGRAM_RANGES];
1503 Uint64 time_track_transaction_error_histogram[TIME_TRACK_HISTOGRAM_RANGES];
1504 };
1505 Uint32 m_my_location_domain_id;
1506
1507 typedef Ptr<HostRecord> HostRecordPtr;
1508
1509 /* *********** TABLE RECORD ********************************************* */
1510
1511 /********************************************************/
1512 /* THIS RECORD CONTAINS THE CURRENT SCHEMA VERSION OF */
1513 /* ALL TABLES IN THE SYSTEM. */
1514 /********************************************************/
1515 struct TableRecord {
TableRecordDbtc::TableRecord1516 TableRecord() {}
1517 Uint32 currentSchemaVersion;
1518 Uint16 m_flags;
1519 Uint8 tableType;
1520 Uint8 singleUserMode;
1521
1522 enum {
1523 TR_ENABLED = 1 << 0,
1524 TR_DROPPING = 1 << 1,
1525 TR_STORED_TABLE = 1 << 2,
1526 TR_PREPARED = 1 << 3
1527 ,TR_USER_DEFINED_PARTITIONING = 1 << 4
1528 ,TR_READ_BACKUP = (1 << 5)
1529 ,TR_FULLY_REPLICATED = (1<<6)
1530 ,TR_DELAY_COMMIT = (1 << 7)
1531 };
get_enabledDbtc::TableRecord1532 Uint8 get_enabled() const { return (m_flags & TR_ENABLED) != 0; }
get_droppingDbtc::TableRecord1533 Uint8 get_dropping() const { return (m_flags & TR_DROPPING) != 0; }
get_storedTableDbtc::TableRecord1534 Uint8 get_storedTable() const { return (m_flags & TR_STORED_TABLE) != 0; }
get_preparedDbtc::TableRecord1535 Uint8 get_prepared() const { return (m_flags & TR_PREPARED) != 0; }
set_enabledDbtc::TableRecord1536 void set_enabled(Uint8 f) { f ? m_flags |= (Uint16)TR_ENABLED : m_flags &= ~(Uint16)TR_ENABLED; }
set_droppingDbtc::TableRecord1537 void set_dropping(Uint8 f) { f ? m_flags |= (Uint16)TR_DROPPING : m_flags &= ~(Uint16)TR_DROPPING; }
set_storedTableDbtc::TableRecord1538 void set_storedTable(Uint8 f) { f ? m_flags |= (Uint16)TR_STORED_TABLE : m_flags &= ~(Uint16)TR_STORED_TABLE; }
set_preparedDbtc::TableRecord1539 void set_prepared(Uint8 f) { f ? m_flags |= (Uint16)TR_PREPARED : m_flags &= ~(Uint16)TR_PREPARED; }
1540
get_user_defined_partitioningDbtc::TableRecord1541 Uint8 get_user_defined_partitioning() const {
1542 return (m_flags & TR_USER_DEFINED_PARTITIONING) != 0;
1543 }
1544
set_user_defined_partitioningDbtc::TableRecord1545 void set_user_defined_partitioning(Uint8 f) {
1546 f ?
1547 m_flags |= (Uint16)TR_USER_DEFINED_PARTITIONING :
1548 m_flags &= ~(Uint16)TR_USER_DEFINED_PARTITIONING;
1549 }
1550
1551 Uint8 noOfKeyAttr;
1552 Uint8 hasCharAttr;
1553 Uint8 noOfDistrKeys;
1554 Uint8 hasVarKeys;
1555
checkTableDbtc::TableRecord1556 bool checkTable(Uint32 schemaVersion) const {
1557 return !get_dropping() &&
1558 ((/** normal transaction path */
1559 get_enabled() &&
1560 table_version_major(schemaVersion) ==
1561 table_version_major(currentSchemaVersion))
1562 ||
1563 (/**
1564 * unique index is relaxed for DbUtil and transactions ongoing
1565 * while index is created
1566 */
1567 get_prepared() && schemaVersion == currentSchemaVersion &&
1568 DictTabInfo::isUniqueIndex(tableType)));
1569 }
1570
1571 Uint32 getErrorCode(Uint32 schemaVersion) const;
1572 };
1573 typedef Ptr<TableRecord> TableRecordPtr;
1574
1575 /**
1576 * Specify the location of a fragment.
1577 * The primaryBlockRef is the location of the primary partition.
1578 * The preferredBlockRef is the preferred location using READ
1579 * BACKUP and/or location domains. The block reference is always
1580 * pointing to a LQH where the data resides.
1581 *
1582 * primaryBlockRef is only used to sort out which SCAN_FRAGREQ to
1583 * SPJ the fragment should be sent to. When using MultiFragFlag
1584 * (currently only used by DBSPJ) we will divide the query into
1585 * a set of SPJ workers, each handling a subset of the root
1586 * table. The primaryBlockRef is used to decide which SPJ worker
1587 * should handle this fragment. The preferredBlockRef decides
1588 * the placement of the SPJ worker still, this means that we can
1589 * have multiple SPJ workers on the same node.
1590 */
1591 struct ScanFragLocation
1592 {
1593 Uint32 primaryBlockRef;
1594 Uint32 preferredBlockRef;
1595 Uint32 fragId;
1596 };
1597
1598 #define NUM_FRAG_LOCATIONS_IN_ARRAY 32
1599
1600 struct ScanFragLocationRec
1601 {
1602 STATIC_CONST( TYPE_ID = RT_DBTC_FRAG_LOCATION );
1603
ScanFragLocationRecDbtc::ScanFragLocationRec1604 ScanFragLocationRec()
1605 : m_magic(Magic::make(TYPE_ID)),
1606 nextList(RNIL)
1607 {}
1608
1609 Uint32 m_magic;
1610
1611 /**
1612 * Next ptr (used in pool/list)
1613 */
1614 Uint32 nextList;
1615
1616 Uint32 m_first_index;
1617 Uint32 m_next_index;
1618 struct ScanFragLocation m_frag_location_array[NUM_FRAG_LOCATIONS_IN_ARRAY];
1619 };
1620
1621 typedef Ptr<ScanFragLocationRec> ScanFragLocationPtr;
1622 typedef TransientPool<ScanFragLocationRec> ScanFragLocation_pool;
1623 STATIC_CONST(DBTC_FRAG_LOCATION_TRANSIENT_POOL_INDEX = 8);
1624 typedef SLFifoList<ScanFragLocation_pool> ScanFragLocation_list;
1625 typedef LocalSLFifoList<ScanFragLocation_pool> Local_ScanFragLocation_list;
1626
1627 alignas(64) ScanFragLocation_pool m_fragLocationPool;
1628 RSS_AP_SNAPSHOT(m_fragLocationPool);
1629
1630 /**
1631 * There is max 16 ScanFragRec's for
1632 * each scan started in TC. Each ScanFragRec is used by
1633 * a scan fragment "process" that scans one fragment at a time.
1634 * It will receive max 16 tuples in each request
1635 */
1636 struct ScanFragRec {
1637 STATIC_CONST( TYPE_ID = RT_DBTC_SCAN_FRAGMENT );
1638 ScanFragRec();
1639 /**
1640 * ScanFragState
1641 * WAIT_GET_PRIMCONF : Waiting for DIGETPRIMCONF when starting a new
1642 * fragment scan (Obsolete; Checked for, but never set)
1643 * LQH_ACTIVE : The scan process has sent a command to LQH and is
1644 * waiting for the response
1645 * LQH_ACTIVE_CLOSE : The scan process has sent close to LQH and is
1646 * waiting for the response (Unused)
1647 * DELIVERED : The result have been delivered, this scan frag process
1648 * are waiting for a SCAN_NEXTREQ to tell us to continue scanning
1649 * RETURNING_FROM_DELIVERY : SCAN_NEXTREQ received and continuing scan
1650 * soon (Unused)
1651 * QUEUED_FOR_DELIVERY : Result queued in TC and waiting for delivery
1652 * to API
1653 * COMPLETED : The fragment scan processes has completed and finally
1654 * sent a SCAN_PROCCONF
1655 */
1656 enum ScanFragState {
1657 IDLE = 0,
1658 WAIT_GET_PRIMCONF = 1,
1659 LQH_ACTIVE = 2,
1660 DELIVERED = 4,
1661 QUEUED_FOR_DELIVERY = 6,
1662 COMPLETED = 7
1663 };
1664 Uint32 m_magic;
1665 // Timer for checking timeout of this fragment scan
1666 Uint32 scanFragTimer;
1667
1668 // Fragment id as reported back by DIGETNODESREQ
1669 Uint32 lqhScanFragId;
1670
1671 // Blockreference of LQH
1672 BlockReference lqhBlockref;
1673
1674 // getNodeInfo.m_connectCount, set at seize used so that
1675 // I don't accidently kill a starting node
1676 Uint32 m_connectCount;
1677
1678 // State of this fragment scan
1679 ScanFragState scanFragState;
1680
1681 // Id of the ScanRecord this fragment scan belongs to
1682 Uint32 scanRec;
1683
1684 // The value of fragmentCompleted in the last received SCAN_FRAGCONF
1685 Uint8 m_scan_frag_conf_status;
1686
startFragTimerDbtc::ScanFragRec1687 inline void startFragTimer(Uint32 timeVal){
1688 scanFragTimer = timeVal;
1689 }
stopFragTimerDbtc::ScanFragRec1690 inline void stopFragTimer(void){
1691 scanFragTimer = 0;
1692 }
1693
1694 Uint32 m_ops;
1695 Uint32 m_apiPtr;
1696 Uint32 m_totalLen;
1697 Uint32 m_hasMore;
1698 Uint32 nextList;
1699 Uint32 prevList;
1700 NDB_TICKS m_start_ticks;
1701 };
1702
1703 typedef Ptr<ScanFragRec> ScanFragRecPtr;
1704 typedef TransientPool<ScanFragRec> ScanFragRec_pool;
1705 STATIC_CONST(DBTC_SCAN_FRAGMENT_TRANSIENT_POOL_INDEX = 9);
1706 typedef SLList<ScanFragRec_pool> ScanFragRec_sllist;
1707 typedef DLList<ScanFragRec_pool> ScanFragRec_dllist;
1708 typedef LocalDLList<ScanFragRec_pool> Local_ScanFragRec_dllist;
1709
1710 /**
1711 * Each scan allocates one ScanRecord to store information
1712 * about the current scan
1713 *
1714 */
1715 struct ScanRecord {
1716 STATIC_CONST( TYPE_ID = RT_DBTC_SCAN_RECORD );
ScanRecordDbtc::ScanRecord1717 ScanRecord()
1718 : m_magic(Magic::make(TYPE_ID)),
1719 scanState(IDLE),
1720 scanApiRec(RNIL)
1721 {
1722 NdbTick_Invalidate(&m_start_ticks);
1723 }
1724 /** NOTE! This is the old comment for ScanState. - MASV
1725 * STATE TRANSITIONS OF SCAN_STATE. SCAN_STATE IS THE STATE
1726 * VARIABLE OF THE RECEIVE AND DELIVERY PROCESS.
1727 * THE PROCESS HAS THREE STEPS IT GOES THROUGH.
1728 * 1) THE INITIAL STATES WHEN RECEIVING DATA FOR THE SCAN.
1729 * - WAIT_SCAN_TAB_INFO
1730 * - WAIT_AI
1731 * - WAIT_FRAGMENT_COUNT
1732 * 2) THE EXECUTION STATES WHEN THE SCAN IS PERFORMED.
1733 * - SCAN_NEXT_ORDERED
1734 * - DELIVERED
1735 * - QUEUED_DELIVERED
1736 * 3) THE CLOSING STATE WHEN THE SCAN PROCESS IS CLOSING UP
1737 * EVERYTHING.
1738 * - CLOSING_SCAN
1739 * INITIAL START WHEN SCAN_TABREQ RECEIVED
1740 * -> WAIT_SCAN_TAB_INFO (IF ANY SCAN_TABINFO TO BE RECEIVED)
1741 * -> WAIT_AI (IF NO SCAN_TAB_INFO BUT ATTRINFO IS RECEIVED)
1742 * -> WAIT_FRAGMENT_COUNT (IF NEITHER SCAN_TABINFO OR ATTRINFO
1743 * RECEIVED)
1744 *
1745 * WAIT_SCAN_TAB_INFO TRANSITIONS:
1746 * -> WAIT_SCAN_TABINFO (WHEN MORE SCAN_TABINFO RECEIVED)
1747 * -> WAIT_AI (WHEN ATTRINFO RECEIVED AFTER RECEIVING ALL
1748 * SCAN_TABINFO)
1749 * -> WAIT_FRAGMENT_COUNT (WHEN NO ATTRINFO RECEIVED AFTER
1750 * RECEIVING ALL SCAN_TABINFO )
1751 * WAIT_AI TRANSITIONS:
1752 * -> WAIT_AI (WHEN MORE ATTRINFO RECEIVED)
1753 * -> WAIT_FRAGMENT_COUNT (WHEN ALL ATTRINFO RECEIVED)
1754 *
1755 * WAIT_FRAGMENT_COUNT TRANSITIONS:
1756 * -> SCAN_NEXT_ORDERED
1757 *
1758 * SCAN_NEXT_ORDERED TRANSITIONS:
1759 * -> DELIVERED (WHEN FIRST SCAN_FRAGCONF ARRIVES WITH OPERATIONS
1760 * TO REPORT IN IT)
1761 * -> CLOSING_SCAN (WHEN SCAN IS CLOSED BY SCAN_NEXTREQ OR BY SOME
1762 * ERROR)
1763 *
1764 * DELIVERED TRANSITIONS:
1765 * -> SCAN_NEXT_ORDERED (IF SCAN_NEXTREQ ARRIVES BEFORE ANY NEW
1766 * OPERATIONS TO REPORT ARRIVES)
1767 * -> QUEUED_DELIVERED (IF NEW OPERATION TO REPORT ARRIVES BEFORE
1768 * SCAN_NEXTREQ)
1769 * -> CLOSING_SCAN (WHEN SCAN IS CLOSED BY SCAN_NEXTREQ OR BY SOME
1770 * ERROR)
1771 *
1772 * QUEUED_DELIVERED TRANSITIONS:
1773 * -> DELIVERED (WHEN SCAN_NEXTREQ ARRIVES AND QUEUED OPERATIONS
1774 * TO REPORT ARE SENT TO THE APPLICATION)
1775 * -> CLOSING_SCAN (WHEN SCAN IS CLOSED BY SCAN_NEXTREQ OR BY
1776 * SOME ERROR)
1777 */
1778 enum ScanState {
1779 IDLE = 0,
1780 WAIT_SCAN_TAB_INFO = 1,
1781 WAIT_AI = 2,
1782 WAIT_FRAGMENT_COUNT = 3,
1783 RUNNING = 4,
1784 CLOSING_SCAN = 5
1785 };
1786
1787 // State of this scan
1788 Uint32 m_magic;
1789 ScanState scanState;
1790 Uint32 scanKeyInfoPtr;
1791 Uint32 scanAttrInfoPtr;
1792
1793 // List of fragment locations as reported by DIH
1794 ScanFragLocation_list::Head m_fragLocations;
1795
1796 ScanFragRec_dllist::Head m_running_scan_frags; // Currently in LQH
1797 union { Uint32 m_queued_count; Uint32 scanReceivedOperations; };
1798 ScanFragRec_dllist::Head m_queued_scan_frags; // In TC !sent to API
1799 ScanFragRec_dllist::Head m_delivered_scan_frags;// Delivered to API
1800
1801 // Id of the next fragment to be scanned. Used by scan fragment
1802 // processes when they are ready for the next fragment
1803 Uint32 scanNextFragId;
1804
1805 // Total number of fragments in the table we are scanning
1806 Uint32 scanNoFrag;
1807
1808 // Length of expected attribute information
1809 Uint32 m_booked_fragments_count;
1810
1811 // Reference to ApiConnectRecord
1812 Uint32 scanApiRec;
1813
1814 // Number of scan frag processes that belong to this scan
1815 Uint32 scanParallel;
1816
1817 // Schema version used by this scan
1818 Uint32 scanSchemaVersion;
1819
1820 // Index of stored procedure belonging to this scan
1821 Uint32 scanStoredProcId;
1822
1823 // The index of table that is scanned
1824 Uint32 scanTableref;
1825 Uint32 m_scan_cookie;
1826
1827 // Number of operation records per scanned fragment
1828 // Number of operations in first batch
1829 // Max number of bytes per batch
1830 union {
1831 Uint16 first_batch_size_rows;
1832 Uint16 batch_size_rows;
1833 };
1834 Uint32 batch_byte_size;
1835 Uint32 m_scan_block_no;
1836
1837 Uint32 scanRequestInfo; // ScanFrag format
1838
1839 // Close is ordered
1840 bool m_close_scan_req;
1841 // All SCAN_FRAGCONS should be passed on to the API as SCAN_TABCONFS.
1842 // This is needed to correctly propagate 'node masks' when scanning via the
1843 // SPJ block.
1844 bool m_pass_all_confs;
1845
1846 /**
1847 * Use 4 or 5 word extended conf signal, where opcount, total_len & active
1848 * are sent as seperate words. 4 or 5 word extended format is decided
1849 * based on 'ndbd_send_active_bitmask(<version>)'
1850 */
1851 bool m_extended_conf;
1852 bool m_read_committed_base;
1853
1854 /**
1855 *
1856 */
1857 bool m_scan_dist_key_flag;
1858 Uint32 m_scan_dist_key;
1859 Uint32 m_read_any_node;
1860 NDB_TICKS m_start_ticks;
1861 };
1862 typedef Ptr<ScanRecord> ScanRecordPtr;
1863 typedef TransientPool<ScanRecord> ScanRecord_pool;
1864 STATIC_CONST(DBTC_SCAN_RECORD_TRANSIENT_POOL_INDEX = 10);
1865
1866 /*************************************************************************>*/
1867 /* GLOBAL CHECKPOINT INFORMATION RECORD */
1868 /* */
1869 /* THIS RECORD IS USED TO STORE THE GLOBALCHECKPOINT NUMBER AND A
1870 * COUNTER DURING THE COMPLETION PHASE OF THE TRANSACTION */
1871 /*************************************************************************>*/
1872 /* */
1873 /* GCP RECORD ALIGNED TO BE 32 BYTES */
1874 /*************************************************************************>*/
1875 struct GcpRecord
1876 {
1877 STATIC_CONST( TYPE_ID = RT_DBTC_GCP_RECORD );
1878
GcpRecordDbtc::GcpRecord1879 GcpRecord()
1880 : m_magic(Magic::make(TYPE_ID))
1881 {}
1882
1883 Uint32 m_magic;
1884 Uint16 gcpNomoretransRec;
1885 LocalApiConnectRecord_gcp_list::Head apiConnectList;
1886 UintR nextList;
1887 Uint64 gcpId;
1888 }; /* p2c: size = 32 bytes */
1889
1890 typedef Ptr<GcpRecord> GcpRecordPtr;
1891 typedef TransientPool<GcpRecord> GcpRecord_pool;
1892 STATIC_CONST(DBTC_GCP_RECORD_TRANSIENT_POOL_INDEX = 11);
1893 typedef LocalSLFifoList<GcpRecord_pool> LocalGcpRecord_list;
1894
1895 /*************************************************************************>*/
1896 /* TC_FAIL_RECORD */
1897 /* THIS RECORD IS USED WHEN HANDLING TAKE OVER OF ANOTHER FAILED
1898 * TC NODE. */
1899 /*************************************************************************>*/
1900 struct TcFailRecord {
1901 Uint16 queueList[MAX_NDB_NODES];
1902 Uint8 takeOverProcState[MAX_NDB_NODES];
1903 UintR completedTakeOver;
1904 UintR currentHashIndexTakeOver;
1905 Uint32 maxInstanceId;
1906 bool takeOverFailed;
1907 bool handledOneTransaction;
1908 Uint32 takeOverInstanceId;
1909 FailState failStatus;
1910 Uint16 queueIndex;
1911 Uint16 takeOverNode;
1912 };
1913 typedef Ptr<TcFailRecord> TcFailRecordPtr;
1914
1915 public:
1916 Dbtc(Block_context&, Uint32 instanceNumber = 0);
1917 virtual ~Dbtc();
1918
1919 private:
1920 BLOCK_DEFINES(Dbtc);
1921
1922 // Transit signals
1923 void execPACKED_SIGNAL(Signal* signal);
1924 void execABORTED(Signal* signal);
1925 void execATTRINFO(Signal* signal);
1926 void execCONTINUEB(Signal* signal);
1927 void execKEYINFO(Signal* signal);
1928 void execSCAN_NEXTREQ(Signal* signal);
1929 void execSCAN_PROCREQ(Signal* signal);
1930 void execSCAN_PROCCONF(Signal* signal);
1931 void execTAKE_OVERTCCONF(Signal* signal);
1932 void execLQHKEYREF(Signal* signal);
1933 void execTRANSID_AI_R(Signal* signal);
1934 void execKEYINFO20_R(Signal* signal);
1935 void execROUTE_ORD(Signal* signal);
1936 // Received signals
1937 void execDUMP_STATE_ORD(Signal* signal);
1938 void execDBINFO_SCANREQ(Signal* signal);
1939 void execSEND_PACKED(Signal* signal);
1940 void execCOMPLETED(Signal* signal);
1941 void execCOMMITTED(Signal* signal);
1942 void execDIGETNODESREF(Signal* signal, ApiConnectRecordPtr apiConnectptr);
1943 void execDIVERIFYCONF(Signal* signal);
1944 void execDIH_SCAN_TAB_REF(Signal* signal,
1945 ScanRecordPtr scanptr,
1946 ApiConnectRecordPtr apiConnectptr);
1947 void execDIH_SCAN_TAB_CONF(Signal* signal,
1948 ScanRecordPtr scanptr,
1949 TableRecordPtr tabPtr,
1950 ApiConnectRecordPtr apiConnectptr);
1951 void execGCP_NOMORETRANS(Signal* signal);
1952 void execLQHKEYCONF(Signal* signal);
1953 void execNDB_STTOR(Signal* signal);
1954 void execREAD_NODESCONF(Signal* signal);
1955 void execREAD_NODESREF(Signal* signal);
1956 void execSTTOR(Signal* signal);
1957 void execTC_COMMITREQ(Signal* signal);
1958 void execTC_CLOPSIZEREQ(Signal* signal);
1959 void execTCGETOPSIZEREQ(Signal* signal);
1960 void execTCKEYREQ(Signal* signal);
1961 void execTCRELEASEREQ(Signal* signal);
1962 void execTCSEIZEREQ(Signal* signal);
1963 void execTCROLLBACKREQ(Signal* signal);
1964 void execTC_HBREP(Signal* signal);
1965 void execTC_SCHVERREQ(Signal* signal);
1966 void execTAB_COMMITREQ(Signal* signal);
1967 void execSCAN_TABREQ(Signal* signal);
1968 void execSCAN_TABINFO(Signal* signal);
1969 void execSCAN_FRAGCONF(Signal* signal);
1970 void execSCAN_FRAGREF(Signal* signal);
1971 void execREAD_CONFIG_REQ(Signal* signal);
1972 void execLQH_TRANSCONF(Signal* signal);
1973 void execCOMPLETECONF(Signal* signal);
1974 void execCOMMITCONF(Signal* signal);
1975 void execABORTCONF(Signal* signal);
1976 void execNODE_FAILREP(Signal* signal);
1977 void execINCL_NODEREQ(Signal* signal);
1978 void execTIME_SIGNAL(Signal* signal);
1979 void execAPI_FAILREQ(Signal* signal);
1980 void execSCAN_HBREP(Signal* signal);
1981
1982 void execABORT_ALL_REQ(Signal* signal);
1983
1984 void execCREATE_TRIG_IMPL_REQ(Signal* signal);
1985 void execDROP_TRIG_IMPL_REQ(Signal* signal);
1986 void execFIRE_TRIG_ORD(Signal* signal);
1987 void execTRIG_ATTRINFO(Signal* signal);
1988 void execCREATE_INDX_IMPL_REQ(Signal* signal);
1989 void execDROP_INDX_IMPL_REQ(Signal* signal);
1990 void execTCINDXREQ(Signal* signal);
1991 void execINDXKEYINFO(Signal* signal);
1992 void execINDXATTRINFO(Signal* signal);
1993 void execALTER_INDX_IMPL_REQ(Signal* signal);
1994 void execSIGNAL_DROPPED_REP(Signal* signal);
1995
1996 void execFIRE_TRIG_REF(Signal*);
1997 void execFIRE_TRIG_CONF(Signal*);
1998
1999 void execCREATE_FK_IMPL_REQ(Signal* signal);
2000 void execDROP_FK_IMPL_REQ(Signal* signal);
2001
2002 // Index table lookup
2003 void execTCKEYCONF(Signal* signal);
2004 void execTCKEYREF(Signal* signal);
2005 void execTRANSID_AI(Signal* signal);
2006 void execTCROLLBACKREP(Signal* signal);
2007
2008 void execCREATE_TAB_REQ(Signal* signal);
2009 void execPREP_DROP_TAB_REQ(Signal* signal);
2010 void execDROP_TAB_REQ(Signal* signal);
2011 void checkWaitDropTabFailedLqh(Signal*, NodeId nodeId, Uint32 tableId);
2012 void execALTER_TAB_REQ(Signal* signal);
2013 void set_timeout_value(Uint32 timeOut);
2014 void set_appl_timeout_value(Uint32 timeOut);
2015 void set_no_parallel_takeover(Uint32);
2016 void updateBuddyTimer(ApiConnectRecordPtr);
2017
2018 // Statement blocks
2019 void updatePackedList(Signal* signal, HostRecord* ahostptr,
2020 Uint16 ahostIndex);
2021 void clearTcNodeData(Signal* signal,
2022 UintR TLastLqhIndicator,
2023 UintR Tstart);
2024 void errorReport(Signal* signal, int place);
2025 void warningReport(Signal* signal, int place);
2026 void printState(Signal* signal, int place, ApiConnectRecordPtr apiConnectptr, bool force_trace = false);
2027 int seizeTcRecord(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2028 int seizeCacheRecord(Signal* signal, CacheRecordPtr& cachePtr, ApiConnectRecord* regApiPtr);
2029 void releaseCacheRecord(ApiConnectRecordPtr transPtr, CacheRecord*);
2030 void TCKEY_abort(Signal* signal, int place, ApiConnectRecordPtr apiConnectptr);
2031 void copyFromToLen(UintR* sourceBuffer, UintR* destBuffer, UintR copyLen);
2032 void reportNodeFailed(Signal* signal, NodeId nodeId);
2033 void sendPackedTCKEYCONF(Signal* signal,
2034 HostRecord * ahostptr,
2035 UintR hostId);
2036 void sendPackedSignal(Signal* signal,
2037 struct PackedWordsContainer * container);
2038 Uint32 sendCommitLqh(Signal* signal,
2039 TcConnectRecord * regTcPtr,
2040 ApiConnectRecord* regApiPtr);
2041 Uint32 sendCompleteLqh(Signal* signal,
2042 TcConnectRecord * const regTcPtr,
2043 ApiConnectRecord* const regApiPtr);
2044
2045 void startSendFireTrigReq(Signal*, Ptr<ApiConnectRecord>);
2046
2047 void sendFireTrigReq(Signal*, Ptr<ApiConnectRecord>);
2048
2049 Uint32 sendFireTrigReqLqh(Signal*, Ptr<TcConnectRecord>, Uint32 pass, ApiConnectRecord* regApiPtr);
2050
2051 void checkWaitFireTrigConfDone(Signal*,
2052 Ptr<ApiConnectRecord>);
2053
2054 /**
2055 * These use modulo 2 hashing, so these need to be a number which is 2^n.
2056 */
2057 #define TC_FAIL_HASH_SIZE 4096
2058 #define TRANSID_FAIL_HASH_SIZE 1024
2059
2060 void sendTCKEY_FAILREF(Signal* signal, ApiConnectRecord *);
2061 void sendTCKEY_FAILCONF(Signal* signal, ApiConnectRecord *);
2062 void routeTCKEY_FAILREFCONF(Signal* signal, const ApiConnectRecord *,
2063 Uint32 gsn, Uint32 len);
2064 void execTCKEY_FAILREFCONF_R(Signal* signal);
2065 void timer_handling(Signal *signal);
2066 void checkStartTimeout(Signal* signal);
2067 void checkStartFragTimeout(Signal* signal);
2068 void timeOutFoundFragLab(Signal* signal, Uint32 TscanConPtr);
2069 void timeOutLoopStartFragLab(Signal* signal, Uint32 TscanConPtr);
2070 int releaseAndAbort(Signal* signal, ApiConnectRecord* regApiPtr);
2071
2072 void scan_for_read_backup(Signal *, Uint32, Uint32, Uint32);
2073 void releaseMarker(ApiConnectRecord * const regApiPtr);
2074
2075 Uint32 get_transid_fail_bucket(Uint32 transid1);
2076 void insert_transid_fail_hash(Uint32 transid1, ApiConnectRecordPtr apiConnectptr);
2077 void remove_from_transid_fail_hash(Signal *signal, Uint32 transid1, ApiConnectRecordPtr apiConnectptr);
2078 Uint32 get_tc_fail_bucket(Uint32 transid1, Uint32 tcOprec);
2079 void insert_tc_fail_hash(Uint32 transid1, Uint32 tcOprec);
2080 void remove_transaction_from_tc_fail_hash(Signal *signal, ApiConnectRecord* regApiPtr);
2081
2082 void initTcFail(Signal* signal);
2083 void releaseTakeOver(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2084 void setupFailData(Signal* signal, ApiConnectRecord* regApiPtr);
2085 bool findApiConnectFail(Signal* signal, Uint32 transid1, Uint32 transid2, ApiConnectRecordPtr& apiConnectptr);
2086 void initApiConnectFail(Signal* signal,
2087 Uint32 transid1,
2088 Uint32 transid2,
2089 LqhTransConf::OperationStatus transStatus,
2090 Uint32 reqinfo,
2091 BlockReference applRef,
2092 Uint64 gci,
2093 NodeId nodeId,
2094 ApiConnectRecordPtr apiConnectptr);
2095 void updateApiStateFail(Signal* signal,
2096 Uint32 transid1,
2097 Uint32 transid2,
2098 LqhTransConf::OperationStatus transStatus,
2099 Uint32 reqinfo,
2100 BlockReference applRef,
2101 Uint64 gci,
2102 NodeId nodeId,
2103 ApiConnectRecordPtr apiConnectptr);
2104 bool findTcConnectFail(Signal* signal,
2105 Uint32 transid1,
2106 Uint32 transid2,
2107 Uint32 tcOprec,
2108 ApiConnectRecordPtr apiConnectptr);
2109 void initTcConnectFail(Signal* signal,
2110 Uint32 instanceKey,
2111 Uint32 tcOprec,
2112 Uint32 reqinfo,
2113 LqhTransConf::OperationStatus transStatus,
2114 NodeId nodeId,
2115 Uint32 apiConnectPtr);
2116 void updateTcStateFail(Signal* signal,
2117 Uint32 instanceKey,
2118 Uint32 tcOprec,
2119 Uint32 reqinfo,
2120 LqhTransConf::OperationStatus transStatus,
2121 NodeId nodeId,
2122 ApiConnectRecordPtr apiConnecptr);
2123
2124 bool handleFailedApiConnection(Signal*,
2125 Uint32 *TloopCount,
2126 Uint32 TapiFailedNode,
2127 bool apiNodeFailed,
2128 ApiConnectRecordPtr apiConnectptr);
2129 void set_api_fail_state(Uint32 TapiFailedNode, bool apiNodeFailed, ApiConnectRecord* regApiPtr);
2130 void handleApiFailState(Signal* signal, UintR anApiConnectptr);
2131 void handleFailedApiNode(Signal* signal,
2132 UintR aFailedNode,
2133 UintR anApiConnectPtr);
2134 void handleScanStop(Signal* signal, UintR aFailedNode);
2135 void initScanTcrec(Signal* signal);
2136 Uint32 initScanrec(ScanRecordPtr, const class ScanTabReq*,
2137 const UintR scanParallel,
2138 const Uint32 apiPtr[],
2139 Uint32 apiConnectPtr);
2140 void initScanfragrec(Signal* signal);
2141 void releaseScanResources(Signal*,
2142 ScanRecordPtr,
2143 ApiConnectRecordPtr apiConnectptr,
2144 bool not_started = false);
2145 ScanRecordPtr seizeScanrec(Signal* signal);
2146
2147 void sendDihGetNodesLab(Signal*, ScanRecordPtr, ApiConnectRecordPtr);
2148 bool sendDihGetNodeReq(Signal*,
2149 ScanRecordPtr,
2150 ScanFragLocationPtr &fragLocationPtr,
2151 Uint32 scanFragId,
2152 bool is_multi_spj_scan);
2153 void get_next_frag_location(ScanFragLocationPtr fragLocationPtr,
2154 Uint32 & fragId,
2155 Uint32 & primaryBlockRef,
2156 Uint32 & preferredBlockRef);
2157 void get_and_step_next_frag_location(ScanFragLocationPtr & fragLocationPtr,
2158 ScanRecord *scanPtrP,
2159 Uint32 & fragId,
2160 Uint32 & primaryBlockRef,
2161 Uint32 & preferredBlockRef);
2162 void sendFragScansLab(Signal*, ScanRecordPtr, ApiConnectRecordPtr);
2163 bool sendScanFragReq(Signal*,
2164 ScanRecordPtr,
2165 ScanFragRecPtr,
2166 ScanFragLocationPtr & fragLocationPtr,
2167 ApiConnectRecordPtr const apiConnectptr);
2168 void sendScanTabConf(Signal* signal, ScanRecordPtr, ApiConnectRecordPtr);
2169 void close_scan_req(Signal*, ScanRecordPtr, bool received_req, ApiConnectRecordPtr apiConnectptr);
2170 void close_scan_req_send_conf(Signal*, ScanRecordPtr, ApiConnectRecordPtr apiConnectptr);
2171
2172 void checkGcp(Signal* signal);
2173 void commitGciHandling(Signal* signal, Uint64 Tgci, ApiConnectRecordPtr apiConnectptr);
2174 void copyApi(Ptr<ApiConnectRecord> dst, Ptr<ApiConnectRecord> src);
2175 void DIVER_node_fail_handling(Signal* signal, Uint64 Tgci, ApiConnectRecordPtr apiConnectptr);
2176 void gcpTcfinished(Signal* signal, Uint64 gci);
2177 void handleGcp(Signal* signal, ApiConnectRecordPtr);
2178 void hash(Signal* signal, CacheRecord * regCachePtr);
2179 bool handle_special_hash(Uint32 dstHash[4],
2180 const Uint32* src, Uint32 srcLen,
2181 Uint32 tabPtrI, bool distr);
2182
2183 void initApiConnect(Signal* signal);
2184 void initApiConnectRec(Signal* signal,
2185 ApiConnectRecord * const regApiPtr,
2186 bool releaseIndexOperations = false);
2187 void inithost(Signal* signal);
2188 void initialiseScanrec(Signal* signal);
2189 void initTable(Signal* signal);
2190 void initialiseTcConnect(Signal* signal);
2191 void linkApiToGcp(Ptr<GcpRecord>, Ptr<ApiConnectRecord>);
2192 void linkGciInGcilist(Ptr<GcpRecord>);
2193 void linkTcInConnectionlist(Signal* signal, ApiConnectRecord* regApiPtr);
2194 void releaseAbortResources(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2195 void releaseApiCon(Signal* signal, UintR aApiConnectPtr);
2196 void releaseApiConCopy(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2197 void releaseApiConnectFail(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2198 void releaseAttrinfo(CacheRecordPtr cachePtr, ApiConnectRecord* regApiPtr);
2199 void releaseKeys(CacheRecord* regCachePtr);
2200 void releaseDirtyRead(Signal*, ApiConnectRecordPtr, TcConnectRecord*);
2201 void releaseDirtyWrite(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2202 void releaseTcCon();
2203 void releaseTcConnectFail(Signal* signal);
2204 void releaseTransResources(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2205 void checkPoolShrinkNeed(Uint32 pool_index, const TransientFastSlotPool& pool);
2206 void sendPoolShrink(Uint32 pool_index);
2207 bool seizeApiConnect(Signal* signal, ApiConnectRecordPtr& apiConnectptr);
2208 bool seizeApiConnectCopy(Signal* signal, ApiConnectRecord* regApiPtr);
2209 bool seizeApiConnectFail(Signal* signal, ApiConnectRecordPtr& apiConnectptr);
2210 [[noreturn]] void crash_gcp(Uint32 line, const char msg[]);
2211 void seizeGcp(Ptr<GcpRecord> & dst, Uint64 gci);
2212 void seizeTcConnectFail(Signal* signal);
2213 Ptr<ApiConnectRecord> sendApiCommitAndCopy(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2214 void sendApiCommitSignal(Signal* signal, Ptr<ApiConnectRecord>);
2215 void sendApiLateCommitSignal(Signal* signal, Ptr<ApiConnectRecord> apiCopy);
2216 bool sendAttrInfoTrain(Signal* signal,
2217 UintR TBRef,
2218 Uint32 connectPtr,
2219 Uint32 offset,
2220 Uint32 attrInfoIVal,
2221 ApiConnectRecord* regApiPtr);
2222 void sendContinueTimeOutControl(Signal* signal, Uint32 TapiConPtr);
2223 void sendlqhkeyreq(Signal* signal,
2224 BlockReference TBRef,
2225 CacheRecord*,
2226 ApiConnectRecord* regApiPtr);
2227 void sendSystemError(Signal* signal, int line);
2228 void sendtckeyconf(Signal* signal, UintR TcommitFlag, ApiConnectRecordPtr apiConnectptr);
2229 void unlinkApiConnect(Ptr<GcpRecord>, Ptr<ApiConnectRecord>);
2230 void unlinkAndReleaseGcp(Ptr<GcpRecord>);
2231 void unlinkReadyTcCon(ApiConnectRecord* regApiPtr);
2232 void handleFailedOperation(Signal* signal,
2233 const LqhKeyRef * const lqhKeyRef,
2234 bool gotLqhKeyRef);
2235 void markOperationAborted(ApiConnectRecord * const regApiPtr,
2236 TcConnectRecord * const regTcPtr);
2237 void clearCommitAckMarker(ApiConnectRecord * const regApiPtr,
2238 TcConnectRecord * const regTcPtr);
2239 // Trigger and index handling
2240 int saveINDXKEYINFO(Signal* signal,
2241 TcIndexOperation* indexOp,
2242 const Uint32 *src,
2243 Uint32 len);
2244 bool receivedAllINDXKEYINFO(TcIndexOperation* indexOp);
2245 int saveINDXATTRINFO(Signal* signal,
2246 TcIndexOperation* indexOp,
2247 const Uint32 *src,
2248 Uint32 len);
2249 bool receivedAllINDXATTRINFO(TcIndexOperation* indexOp);
2250 Uint32 saveTRANSID_AI(Signal* signal,
2251 TcIndexOperation* indexOp,
2252 const Uint32 *src,
2253 Uint32 len);
2254 bool receivedAllTRANSID_AI(TcIndexOperation* indexOp);
2255 void readIndexTable(Signal* signal,
2256 ApiConnectRecordPtr transPtr,
2257 TcIndexOperation* indexOp,
2258 Uint32 special_op_flags);
2259 void executeIndexOperation(Signal* signal,
2260 ApiConnectRecord* regApiPtr,
2261 TcIndexOperation* indexOp);
2262 bool seizeIndexOperation(ApiConnectRecord* regApiPtr,
2263 TcIndexOperationPtr& indexOpPtr);
2264 void releaseIndexOperation(ApiConnectRecord* regApiPtr,
2265 TcIndexOperation* indexOp);
2266 void releaseAllSeizedIndexOperations(ApiConnectRecord* regApiPtr);
2267 void setupIndexOpReturn(ApiConnectRecord* regApiPtr,
2268 TcConnectRecord* regTcPtr);
2269
2270 void saveTriggeringOpState(Signal* signal,
2271 TcConnectRecord* trigOp);
2272 void restoreTriggeringOpState(Signal* signal,
2273 TcConnectRecord* trigOp);
2274 void trigger_op_finished(Signal* signal,
2275 ApiConnectRecordPtr,
2276 Uint32 triggerPtrI,
2277 TcConnectRecord* triggeringOp,
2278 Uint32 returnCode);
2279 void continueTriggeringOp(Signal* signal,
2280 TcConnectRecord* trigOp,
2281 ApiConnectRecordPtr);
2282
2283 void executeTriggers(Signal* signal, ApiConnectRecordPtr const* transPtr);
2284 void waitToExecutePendingTrigger(Signal* signal, ApiConnectRecordPtr transPtr);
2285 bool executeTrigger(Signal* signal,
2286 TcFiredTriggerData* firedTriggerData,
2287 ApiConnectRecordPtr const* transPtr,
2288 TcConnectRecordPtr* opPtr);
2289 void executeIndexTrigger(Signal* signal,
2290 TcDefinedTriggerData* definedTriggerData,
2291 TcFiredTriggerData* firedTriggerData,
2292 ApiConnectRecordPtr const* transPtr,
2293 TcConnectRecordPtr* opPtr);
2294 Uint32 appendDataToSection(Uint32& sectionIVal,
2295 AttributeBuffer & src,
2296 AttributeBuffer::DataBufferIterator & iter,
2297 Uint32 len);
2298 bool appendAttrDataToSection(Uint32& sectionIVal,
2299 AttributeBuffer& values,
2300 bool withHeaders,
2301 Uint32& attrId,
2302 bool& hasNull);
2303 void insertIntoIndexTable(Signal* signal,
2304 TcFiredTriggerData* firedTriggerData,
2305 ApiConnectRecordPtr const* transPtr,
2306 TcConnectRecordPtr* opPtr,
2307 TcIndexData* indexData);
2308 void deleteFromIndexTable(Signal* signal,
2309 TcFiredTriggerData* firedTriggerData,
2310 ApiConnectRecordPtr const* transPtr,
2311 TcConnectRecordPtr* opPtr,
2312 TcIndexData* indexData);
2313
2314 void executeReorgTrigger(Signal* signal,
2315 TcDefinedTriggerData* definedTriggerData,
2316 TcFiredTriggerData* firedTriggerData,
2317 ApiConnectRecordPtr const* transPtr,
2318 TcConnectRecordPtr* opPtr);
2319
2320 void executeFKParentTrigger(Signal* signal,
2321 TcDefinedTriggerData* definedTriggerData,
2322 TcFiredTriggerData* firedTriggerData,
2323 ApiConnectRecordPtr const* transPtr,
2324 TcConnectRecordPtr* opPtr);
2325
2326 void executeFKChildTrigger(Signal* signal,
2327 TcDefinedTriggerData* definedTriggerData,
2328 TcFiredTriggerData* firedTriggerData,
2329 ApiConnectRecordPtr const* transPtr,
2330 TcConnectRecordPtr* opPtr);
2331
2332 void fk_readFromParentTable(Signal* signal,
2333 TcFiredTriggerData* firedTriggerData,
2334 ApiConnectRecordPtr const* transPtr,
2335 TcConnectRecordPtr* opPtr,
2336 TcFKData* fkData);
2337 void fk_readFromChildTable(Signal* signal,
2338 TcFiredTriggerData* firedTriggerData,
2339 ApiConnectRecordPtr const* transPtr,
2340 TcConnectRecordPtr* opPtr,
2341 TcFKData* fkData,
2342 Uint32 op, Uint32 attrValuesPtrI);
2343
2344 Uint32 fk_buildKeyInfo(Uint32& keyInfoPtrI, bool& hasNull,
2345 LocalAttributeBuffer& values,
2346 TcFKData* fkData,
2347 bool parent);
2348
2349 void fk_execTCINDXREQ(Signal*, ApiConnectRecordPtr, TcConnectRecordPtr,
2350 Uint32 operation);
2351 void fk_scanFromChildTable(Signal* signal,
2352 TcFiredTriggerData* firedTriggerData,
2353 ApiConnectRecordPtr const* transPtr,
2354 Uint32 opPtrI,
2355 TcFKData* fkData,
2356 Uint32 op, Uint32 attrValuesPtrI);
2357 void fk_scanFromChildTable_done(Signal* signal,
2358 TcConnectRecordPtr,
2359 ApiConnectRecordPtr);
2360 void fk_scanFromChildTable_abort(Signal* signal,
2361 TcConnectRecordPtr,
2362 ApiConnectRecordPtr);
2363
2364 void execSCAN_TABREF(Signal*);
2365 void execSCAN_TABCONF(Signal*);
2366 void execKEYINFO20(Signal*);
2367
2368 Uint32 fk_buildBounds(SegmentedSectionPtr & dst,
2369 LocalAttributeBuffer & src,
2370 TcFKData* fkData);
2371 Uint32 fk_constructAttrInfoSetNull(const TcFKData*);
2372 Uint32 fk_constructAttrInfoUpdateCascade(const TcFKData*,
2373 AttributeBuffer::Head&);
2374
2375 bool executeFullyReplicatedTrigger(Signal* signal,
2376 TcDefinedTriggerData* definedTriggerData,
2377 TcFiredTriggerData* firedTriggerData,
2378 ApiConnectRecordPtr const* transPtr,
2379 TcConnectRecordPtr* opPtr);
2380
2381 void releaseFiredTriggerData(Local_TcFiredTriggerData_fifo::Head* triggers);
2382 void abortTransFromTrigger(Signal* signal,
2383 ApiConnectRecordPtr transPtr,
2384 Uint32 error);
2385 // Generated statement blocks
2386 void warningHandlerLab(Signal* signal, int line);
2387 [[noreturn]] void systemErrorLab(Signal* signal, int line);
2388 void sendSignalErrorRefuseLab(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2389 void scanTabRefLab(Signal* signal, Uint32 errCode, ApiConnectRecord* regApiPtr);
2390 void diFcountReqLab(Signal* signal, ScanRecordPtr, ApiConnectRecordPtr);
2391 void signalErrorRefuseLab(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2392 void abort080Lab(Signal* signal);
2393 void sendKeyInfoTrain(Signal* signal,
2394 BlockReference TBRef,
2395 Uint32 connectPtr,
2396 Uint32 offset,
2397 Uint32 keyInfoIVal,
2398 ApiConnectRecord* const regApiPtr);
2399 void abortScanLab(Signal* signal, ScanRecordPtr, Uint32 errCode,
2400 bool not_started, ApiConnectRecordPtr apiConnectptr);
2401 void sendAbortedAfterTimeout(Signal* signal, int Tcheck, ApiConnectRecordPtr apiConnectptr);
2402 void abort010Lab(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2403 void abort015Lab(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2404 void packLqhkeyreq(Signal* signal,
2405 BlockReference TBRef,
2406 CacheRecordPtr,
2407 ApiConnectRecordPtr apiConnectptr);
2408 void packLqhkeyreq040Lab(Signal* signal,
2409 BlockReference TBRef,
2410 CacheRecordPtr,
2411 ApiConnectRecordPtr apiConnectptr);
2412 void returnFromQueuedDeliveryLab(Signal* signal);
2413 void insert_take_over_failed_node(Signal*, Uint32 failedNodeId);
2414 void startTakeOverLab(Signal* signal,
2415 Uint32 instanceId,
2416 Uint32 failedNodeId);
2417 void toCompleteHandlingLab(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2418 void toCommitHandlingLab(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2419 void toAbortHandlingLab(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2420 void abortErrorLab(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2421 void nodeTakeOverCompletedLab(Signal* signal,
2422 NodeId nodeId,
2423 Uint32 maxInstanceId);
2424 void ndbsttorry010Lab(Signal* signal);
2425 void commit020Lab(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2426 void complete010Lab(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2427 void releaseAtErrorLab(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2428 void appendToSectionErrorLab(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2429 void scanKeyinfoLab(Signal* signal, CacheRecord*, ApiConnectRecordPtr);
2430 void scanAttrinfoLab(Signal* signal, UintR Tlen, ApiConnectRecordPtr apiConnectptr);
2431 void attrinfoDihReceivedLab(Signal* signal, CacheRecordPtr cachePtr, ApiConnectRecordPtr apiConnectptr);
2432 void aiErrorLab(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2433 void scanReleaseResourcesLab(Signal* signal);
2434 void scanCompletedLab(Signal* signal);
2435 void scanError(Signal* signal, ScanRecordPtr, Uint32 errorCode);
2436 void diverify010Lab(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2437 void intstartphase3x010Lab(Signal* signal);
2438 void sttorryLab(Signal* signal);
2439 void abortBeginErrorLab(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2440 void tabStateErrorLab(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2441 void wrongSchemaVersionErrorLab(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2442 void noFreeConnectionErrorLab(Signal* signal, ApiConnectRecordPtr apiConnectptr);
2443 void tckeyreq050Lab(Signal* signal, CacheRecordPtr cachePtr, ApiConnectRecordPtr apiConnectptr);
2444 void timeOutFoundLab(Signal* signal, UintR anAdd, Uint32 errCode);
2445 void completeTransAtTakeOverLab(Signal* signal, UintR TtakeOverInd);
2446 void completeTransAtTakeOverDoLast(Signal* signal, UintR TtakeOverInd);
2447 void completeTransAtTakeOverDoOne(Signal* signal, UintR TtakeOverInd, ApiConnectRecordPtr apiConnectptr);
2448 void timeOutLoopStartLab(Signal* signal, Uint32 apiConnectPtr);
2449 void initialiseRecordsLab(Signal* signal, UintR Tdata0, Uint32, Uint32);
2450 void tckeyreq020Lab(Signal* signal, CacheRecordPtr cachePtr, ApiConnectRecordPtr apiConnectptr);
2451 void intstartphase1x010Lab(Signal* signal, NodeId nodeId);
2452 void startphase1x010Lab(Signal* signal);
2453
2454 void lqhKeyConf_checkTransactionState(Signal * signal,
2455 ApiConnectRecordPtr apiConnectptr);
2456
2457 void checkDropTab(Signal* signal);
2458
2459 void checkScanActiveInFailedLqh(Signal* signal,
2460 Uint32 scanPtrI,
2461 Uint32 failedNodeId);
2462 void checkScanFragList(Signal*, Uint32 failedNodeId, ScanRecord * scanP,
2463 Local_ScanFragRec_dllist::Head&);
2464
2465 void nodeFailCheckTransactions(Signal*,Uint32 transPtrI,Uint32 failedNodeId);
2466 void ndbdFailBlockCleanupCallback(Signal* signal, Uint32 failedNodeId, Uint32 ignoredRc);
2467 void checkNodeFailComplete(Signal* signal, Uint32 failedNodeId, Uint32 bit);
2468
2469 void apiFailBlockCleanupCallback(Signal* signal, Uint32 failedNodeId, Uint32 ignoredRc);
2470
2471 // Initialisation
2472 void initData();
2473 void initRecords(const ndb_mgm_configuration_iterator * mgm_cfg);
2474
2475 /**
2476 * Functions used at completion of activities tracked for timing.
2477 * Currently we track
2478 * 1) Transactions
2479 * 2) Key operations
2480 * 3) Scan operations
2481 * 4) Execution of SCAN_FRAGREQ's (local scans)
2482 */
2483 void time_track_init_histogram_limits(void);
2484 Uint32 time_track_calculate_histogram_position(NDB_TICKS & start_ticks);
2485
2486 void time_track_complete_scan(ScanRecord * const scanPtr,
2487 Uint32 apiNodeId);
2488 void time_track_complete_scan_error(ScanRecord * const scanPtr,
2489 Uint32 apiNodeId);
2490 void time_track_complete_key_operation(TcConnectRecord * const tcConnectPtr,
2491 Uint32 apiNodeId,
2492 Uint32 dbNodeId);
2493 void time_track_complete_index_key_operation(
2494 TcConnectRecord * const tcConnectPtr,
2495 Uint32 apiNodeId,
2496 Uint32 dbNodeId);
2497 void time_track_complete_key_operation_error(
2498 TcConnectRecord * const tcConnectPtr,
2499 Uint32 apiNodeId,
2500 Uint32 dbNodeId);
2501 void time_track_complete_scan_frag(ScanFragRec * const scanFragPtr);
2502 void time_track_complete_scan_frag_error(ScanFragRec *const scanFragPtr);
2503 void time_track_complete_transaction(ApiConnectRecord *const regApiPtr);
2504 void time_track_complete_transaction_error(
2505 ApiConnectRecord * const regApiPtr);
2506 Uint32 check_own_location_domain(Uint16*, Uint32);
2507 protected:
2508 virtual bool getParam(const char* name, Uint32* count);
2509
2510 private:
2511 Uint32 c_time_track_histogram_boundary[TIME_TRACK_HISTOGRAM_RANGES];
2512 bool c_time_track_activated;
2513 // Transit signals
2514
2515
2516 alignas(64) ApiConnectRecord_pool c_apiConnectRecordPool;
2517 RSS_AP_SNAPSHOT(c_apiConnectRecordPool);
2518
2519 alignas(64) TcConnectRecord_pool tcConnectRecord;
2520 TcConnectRecordPtr tcConnectptr;
2521 UintR ctcConnectFailCount;
2522
2523 alignas(64) CacheRecord_pool c_cacheRecordPool;
2524 RSS_AP_SNAPSHOT(c_cacheRecordPool);
2525
2526 HostRecord *hostRecord;
2527 HostRecordPtr hostptr;
2528 UintR chostFilesize;
2529 NdbNodeBitmask c_alive_nodes;
2530
2531 Uint32 c_ongoing_take_over_cnt;
2532 alignas(64) GcpRecord_pool c_gcpRecordPool;
2533 RSS_AP_SNAPSHOT(c_gcpRecordPool);
2534
2535 TableRecord *tableRecord;
2536 UintR ctabrecFilesize;
2537
2538 UintR thashValue;
2539 UintR tdistrHashValue;
2540
2541 UintR ttransid_ptr;
2542 UintR cfailure_nr;
2543 UintR coperationsize;
2544 UintR ctcTimer;
2545 UintR cDbHbInterval;
2546
2547 Uint32 c_lqhkeyconf_direct_sent;
2548
2549 Uint64 tcheckGcpId;
2550
2551 // Montonically increasing counters
2552 struct MonotonicCounters {
2553 Uint64 cattrinfoCount;
2554 Uint64 ctransCount;
2555 Uint64 ccommitCount;
2556 Uint64 creadCount;
2557 Uint64 csimpleReadCount;
2558 Uint64 cwriteCount;
2559 Uint64 cabortCount;
2560 Uint64 c_scan_count;
2561 Uint64 c_range_scan_count;
2562 Uint64 clocalReadCount;
2563 Uint64 clocalWriteCount;
2564
2565 // Resource usage counter(not monotonic)
2566 Uint32 cconcurrentOp;
2567 Uint32 cconcurrentScans;
2568
MonotonicCountersDbtc::MonotonicCounters2569 MonotonicCounters() :
2570 cattrinfoCount(0),
2571 ctransCount(0),
2572 ccommitCount(0),
2573 creadCount(0),
2574 csimpleReadCount(0),
2575 cwriteCount(0),
2576 cabortCount(0),
2577 c_scan_count(0),
2578 c_range_scan_count(0),
2579 clocalReadCount(0),
2580 clocalWriteCount(0),
2581 cconcurrentOp(0) {}
2582
build_event_repDbtc::MonotonicCounters2583 Uint32 build_event_rep(Signal* signal)
2584 {
2585 /*
2586 Read saved value from CONTINUEB, subtract from
2587 counter and write to EVENT_REP
2588 */
2589 const Uint32 attrinfoCount = diff(signal, 1, cattrinfoCount);
2590 const Uint32 transCount = diff(signal, 3, ctransCount);
2591 const Uint32 commitCount = diff(signal, 5, ccommitCount);
2592 const Uint32 readCount = diff(signal, 7, creadCount);
2593 const Uint32 simpleReadCount = diff(signal, 9, csimpleReadCount);
2594 const Uint32 writeCount = diff(signal, 11, cwriteCount);
2595 const Uint32 abortCount = diff(signal, 13, cabortCount);
2596 const Uint32 scan_count = diff(signal, 15, c_scan_count);
2597 const Uint32 range_scan_count = diff(signal, 17, c_range_scan_count);
2598 const Uint32 localread_count = diff(signal, 19, clocalReadCount);
2599 const Uint32 localwrite_count = diff(signal, 21, clocalWriteCount);
2600
2601 signal->theData[0] = NDB_LE_TransReportCounters;
2602 signal->theData[1] = transCount;
2603 signal->theData[2] = commitCount;
2604 signal->theData[3] = readCount;
2605 signal->theData[4] = simpleReadCount;
2606 signal->theData[5] = writeCount;
2607 signal->theData[6] = attrinfoCount;
2608 signal->theData[7] = cconcurrentOp; // Exception that confirms the rule!
2609 signal->theData[8] = abortCount;
2610 signal->theData[9] = scan_count;
2611 signal->theData[10] = range_scan_count;
2612 signal->theData[11] = localread_count;
2613 signal->theData[12] = localwrite_count;
2614 return 13;
2615 }
2616
build_continueBDbtc::MonotonicCounters2617 Uint32 build_continueB(Signal* signal) const
2618 {
2619 /* Save current value of counters to CONTINUEB */
2620 const Uint64* vars[] = {
2621 &cattrinfoCount, &ctransCount, &ccommitCount,
2622 &creadCount, &csimpleReadCount, &cwriteCount,
2623 &cabortCount, &c_scan_count, &c_range_scan_count,
2624 &clocalReadCount, &clocalWriteCount
2625 };
2626 const size_t num = sizeof(vars)/sizeof(vars[0]);
2627
2628 for (size_t i = 0; i < num; i++)
2629 {
2630 signal->theData[1+i*2] = Uint32(*vars[i] >> 32);
2631 signal->theData[1+i*2+1] = Uint32(*vars[i]);
2632 }
2633 return 1 + num * 2;
2634 }
2635 private:
diffDbtc::MonotonicCounters2636 Uint32 diff(Signal* signal, size_t pos, Uint64 curr) const
2637 {
2638 const Uint64 old =
2639 (signal->theData[pos+1] | (Uint64(signal->theData[pos]) << 32));
2640 return (Uint32)(curr - old);
2641 }
2642 } c_counters;
2643 RSS_OP_SNAPSHOT(cconcurrentOp);
2644
2645 Uint32 m_concurrent_overtakeable_operations;
2646
2647 Uint16 cownNodeid;
2648 Uint16 terrorCode;
2649
2650 LocalApiConnectRecord_api_fifo::Head capiConnectPREPARE_TO_COMMITList;
2651
2652 LocalGcpRecord_list::Head c_gcpRecordList;
2653 UintR cConcScanCount;
2654 RSS_OP_SNAPSHOT(cConcScanCount);
2655
2656 TableRecordPtr tabptr;
2657 LocalApiConnectRecord_api_list::Head c_apiConnectFailList;
2658 Uint32 capiConnectFailCount;
2659
2660 BlockReference cdihblockref;
2661 BlockReference cownref; /* OWN BLOCK REFERENCE */
2662
2663 ApiConnectRecordPtr timeOutptr;
2664
2665 alignas(64) ScanRecord_pool scanRecordPool;
2666 UintR cscanrecFileSize;
2667 UintR cscanrecFileSize_original;
2668
2669 alignas(64) ScanFragRec_pool c_scan_frag_pool;
2670 RSS_AP_SNAPSHOT(c_scan_frag_pool);
2671 ScanFragRecPtr scanFragptr;
2672
2673 BlockReference cndbcntrblockref;
2674 BlockInstance cspjInstanceRR; // SPJ instance round-robin counter
2675
2676 Uint16 csignalKey;
2677 Uint16 csystemnodes;
2678 Uint16 cnodes[4];
2679 NodeId cmasterNodeId;
2680 UintR cnoParallelTakeOver;
2681 TimeOutCheckState ctimeOutCheckFragActive;
2682
2683 Uint32 ctimeOutCheckFragCounter;
2684 Uint32 ctimeOutCheckCounter;
2685 Uint32 ctimeOutValue;
2686 Uint32 ctimeOutCheckDelay;
2687 Uint32 ctimeOutCheckDelayScan;
2688 Uint32 ctimeOutCheckHeartbeat;
2689 Uint32 ctimeOutCheckLastHeartbeat;
2690 Uint32 ctimeOutMissedHeartbeats;
2691 Uint32 ctimeOutCheckHeartbeatScan;
2692 Uint32 ctimeOutCheckLastHeartbeatScan;
2693 Uint32 ctimeOutMissedHeartbeatsScan;
2694 Uint32 c_appl_timeout_value;
2695
2696 TimeOutCheckState ctimeOutCheckActive;
2697
2698 Uint64 c_elapsed_time_millis;
2699 NDB_TICKS c_latestTIME_SIGNAL;
2700
2701 BlockReference capiFailRef;
2702 UintR cpackedListIndex;
2703 Uint16 cpackedList[MAX_NODES];
2704 UintR capiConnectClosing[MAX_NODES];
2705 UintR con_lineNodes;
2706
2707 UintR tabortInd;
2708
2709 BlockReference tblockref;
2710
2711 Uint8 tcurrentReplicaNo;
2712
2713 UintR tindex;
2714 UintR tmaxData;
2715
2716 BlockReference tusersblkref;
2717 UintR tuserpointer;
2718
2719 UintR ctransidFailHash[TRANSID_FAIL_HASH_SIZE];
2720 UintR ctcConnectFailHash[TC_FAIL_HASH_SIZE];
2721
2722 /**
2723 * Commit Ack handling
2724 */
2725 public:
2726 struct CommitAckMarker
2727 {
2728 STATIC_CONST( TYPE_ID = RT_DBTC_COMMIT_ACK_MARKER );
2729
CommitAckMarkerDbtc::CommitAckMarker2730 CommitAckMarker()
2731 : m_magic(Magic::make(TYPE_ID)),
2732 apiConnectPtr(RNIL),
2733 apiNodeId(0)
2734 {}
2735
2736 Uint32 m_magic;
2737 Uint32 transid1;
2738 Uint32 transid2;
2739 Uint32 nextHash;
2740 Uint32 prevHash;
2741 Uint32 apiConnectPtr;
2742 Uint16 apiNodeId;
2743 CommitAckMarkerBuffer::Head theDataBuffer;
2744
equalDbtc::CommitAckMarker2745 inline bool equal(const CommitAckMarker & p) const {
2746 return ((p.transid1 == transid1) && (p.transid2 == transid2));
2747 }
2748
hashValueDbtc::CommitAckMarker2749 inline Uint32 hashValue() const {
2750 return transid1;
2751 }
2752 bool insert_in_commit_ack_marker(Dbtc *tc,
2753 Uint32 instanceKey,
2754 NodeId nodeId);
2755 // insert all keys when exact keys not known
2756 bool insert_in_commit_ack_marker_all(Dbtc *tc,
2757 NodeId nodeId);
2758 };
2759
2760 private:
2761 typedef Ptr<CommitAckMarker> CommitAckMarkerPtr;
2762 typedef TransientPool<CommitAckMarker> CommitAckMarker_pool;
2763 STATIC_CONST(DBTC_COMMIT_ACK_MARKER_TRANSIENT_POOL_INDEX = 12);
2764 typedef DLHashTable<CommitAckMarker_pool> CommitAckMarker_hash;
2765 typedef CommitAckMarker_hash::Iterator CommitAckMarkerIterator;
2766
2767 CommitAckMarker_pool m_commitAckMarkerPool;
2768 CommitAckMarker_hash m_commitAckMarkerHash;
2769 RSS_AP_SNAPSHOT(m_commitAckMarkerPool);
2770
2771 static const Uint32 c_transient_pool_count = 13;
2772 TransientFastSlotPool* c_transient_pools[c_transient_pool_count];
2773 Bitmask<1> c_transient_pools_shrinking;
2774
2775 void execTC_COMMIT_ACK(Signal* signal);
2776 void sendRemoveMarkers(Signal*, CommitAckMarker *, Uint32);
2777 void sendRemoveMarker(Signal* signal,
2778 NodeId nodeId,
2779 Uint32 instanceKey,
2780 Uint32 transid1,
2781 Uint32 transid2,
2782 Uint32 removed_by_fail_api);
2783 void removeMarkerForFailedAPI(Signal* signal, NodeId nodeId, Uint32 bucket);
2784
getAllowStartTransaction(NodeId nodeId,Uint32 table_single_user_mode) const2785 bool getAllowStartTransaction(NodeId nodeId, Uint32 table_single_user_mode) const {
2786 if (unlikely(getNodeState().getSingleUserMode()))
2787 {
2788 if (getNodeState().getSingleUserApi() == nodeId || table_single_user_mode)
2789 return true;
2790 else
2791 return false;
2792 }
2793 return getNodeState().startLevel < NodeState::SL_STOPPING_2;
2794 }
2795
2796 void checkAbortAllTimeout(Signal* signal, Uint32 sleepTime);
2797 struct AbortAllRecord {
AbortAllRecordDbtc::AbortAllRecord2798 AbortAllRecord() : clientRef(0), oldTimeOutValue(0) {}
2799 Uint32 clientData;
2800 BlockReference clientRef;
2801
2802 Uint32 oldTimeOutValue;
2803 };
2804 AbortAllRecord c_abortRec;
2805
2806 bool validate_filter(Signal*);
2807 bool match_and_print(Signal*, ApiConnectRecordPtr);
2808 bool ndbinfo_write_trans(Ndbinfo::Row&, ApiConnectRecordPtr);
2809
2810 #ifdef ERROR_INSERT
2811 bool testFragmentDrop(Signal* signal);
2812 #endif
2813
2814 /************************** API CONNECT RECORD ***********************/
2815 /* *******************************************************************/
2816 /* THE API CONNECT RECORD CONTAINS THE CONNECTION RECORD TO WHICH THE*/
2817 /* APPLICATION CONNECTS. THE APPLICATION CAN SEND ONE OPERATION AT A */
2818 /* TIME. IT CAN SEND A NEW OPERATION IMMEDIATELY AFTER SENDING THE */
2819 /* PREVIOUS OPERATION. THEREBY SEVERAL OPERATIONS CAN BE ACTIVE IN */
2820 /* ONE TRANSACTION WITHIN TC. THIS IS ACHIEVED BY USING THE API */
2821 /* CONNECT RECORD. EACH ACTIVE OPERATION IS HANDLED BY THE TC */
2822 /* CONNECT RECORD. AS SOON AS THE TC CONNECT RECORD HAS SENT THE */
2823 /* REQUEST TO THE LQH IT IS READY TO RECEIVE NEW OPERATIONS. THE */
2824 /* LQH CONNECT RECORD TAKES CARE OF WAITING FOR AN OPERATION TO */
2825 /* COMPLETE. WHEN AN OPERATION HAS COMPLETED ON THE LQH CONNECT */
2826 /* RECORD A NEW OPERATION CAN BE STARTED ON THIS LQH CONNECT RECORD. */
2827 /*******************************************************************>*/
2828 /* */
2829 /* API CONNECT RECORD ALIGNED TO BE 256 BYTES */
2830 /*******************************************************************>*/
2831 /************************** TC CONNECT RECORD ************************/
2832 /* *******************************************************************/
2833 /* TC CONNECT RECORD KEEPS ALL INFORMATION TO CARRY OUT A TRANSACTION*/
2834 /* THE TRANSACTION CONTROLLER ESTABLISHES CONNECTIONS TO DIFFERENT */
2835 /* BLOCKS TO CARRY OUT THE TRANSACTION. THERE CAN BE SEVERAL RECORDS */
2836 /* PER ACTIVE TRANSACTION. THE TC CONNECT RECORD COOPERATES WITH THE */
2837 /* API CONNECT RECORD FOR COMMUNICATION WITH THE API AND WITH THE */
2838 /* LQH CONNECT RECORD FOR COMMUNICATION WITH THE LQH'S INVOLVED IN */
2839 /* THE TRANSACTION. TC CONNECT RECORD IS PERMANENTLY CONNECTED TO A */
2840 /* RECORD IN DICT AND ONE IN DIH. IT CONTAINS A LIST OF ACTIVE LQH */
2841 /* CONNECT RECORDS AND A LIST OF STARTED BUT NOT ACTIVE LQH CONNECT */
2842 /* RECORDS. IT DOES ALSO CONTAIN A LIST OF ALL OPERATIONS THAT ARE */
2843 /* EXECUTED WITH THE TC CONNECT RECORD. */
2844 /*******************************************************************>*/
2845 /* TC_CONNECT RECORD ALIGNED TO BE 128 BYTES */
2846 /*******************************************************************>*/
2847 LocalTcConnectRecord_fifo::Head cfreeTcConnectFail;
2848
2849 /* POINTER FOR THE LQH RECORD*/
2850 /* ************************ HOST RECORD ********************************* */
2851 /********************************************************/
2852 /* THIS RECORD CONTAINS ALIVE-STATUS ON ALL NODES IN THE*/
2853 /* SYSTEM */
2854 /********************************************************/
2855 /* THIS RECORD IS ALIGNED TO BE 8 BYTES. */
2856 /********************************************************/
2857 /* ************************ TABLE RECORD ******************************** */
2858 /********************************************************/
2859 /* THIS RECORD CONTAINS THE CURRENT SCHEMA VERSION OF */
2860 /* ALL TABLES IN THE SYSTEM. */
2861 /********************************************************/
2862 /*-------------------------------------------------------------------------*/
2863 /* THE TC CONNECTION USED BY THIS SCAN. */
2864 /*-------------------------------------------------------------------------*/
2865 /*-------------------------------------------------------------------------*/
2866 /* LENGTH READ FOR A PARTICULAR SCANNED OPERATION. */
2867 /*-------------------------------------------------------------------------*/
2868 /*-------------------------------------------------------------------------*/
2869 /* REFERENCE TO THE SCAN RECORD FOR THIS SCAN PROCESS. */
2870 /*-------------------------------------------------------------------------*/
2871 /* *********************************************************************** */
2872 /* ******$ DATA BUFFER ******$ */
2873 /* */
2874 /* THIS BUFFER IS USED AS A GENERAL DATA STORAGE. */
2875 /* *********************************************************************** */
2876 /* *********************************************************************** */
2877 /* ******$ ATTRIBUTE INFORMATION RECORD ******$ */
2878 /*
2879 CAN CONTAIN ONE (1) ATTRINFO SIGNAL. ONE SIGNAL CONTAINS 24 ATTR.
2880 INFO WORDS. BUT 32 ELEMENTS ARE USED TO MAKE PLEX HAPPY.
2881 SOME OF THE ELEMENTS ARE USED TO THE FOLLOWING THINGS:
2882 DATA LENGHT IN THIS RECORD IS STORED IN THE ELEMENT INDEXED BY
2883 ZINBUF_DATA_LEN.
2884 NEXT FREE ATTRBUF IS POINTED OUT BY THE ELEMENT INDEXED BY
2885 PREVIOUS ATTRBUF IS POINTED OUT BY THE ELEMENT INDEXED BY ZINBUF_PREV
2886 (NOT USED YET).
2887 NEXT ATTRBUF IS POINTED OUT BY THE ELEMENT INDEXED BY ZINBUF_NEXT.
2888 */
2889 /* ********************************************************************** */
2890 /**************************************************************************/
2891 /* GLOBAL CHECKPOINT INFORMATION RECORD */
2892 /* */
2893 /* THIS RECORD IS USED TO STORE THE GCP NUMBER AND A COUNTER */
2894 /* DURING THE COMPLETION PHASE OF THE TRANSACTION */
2895 /**************************************************************************/
2896 /* */
2897 /* GCP RECORD ALIGNED TO BE 32 BYTES */
2898 /**************************************************************************/
2899 /**************************************************************************/
2900 /* TC_FAIL_RECORD */
2901 /* THIS RECORD IS USED WHEN HANDLING TAKE OVER OF ANOTHER FAILED TC NODE.*/
2902 /**************************************************************************/
2903 TcFailRecord *tcFailRecord;
2904 TcFailRecordPtr tcNodeFailptr;
2905 /**************************************************************************/
2906 // Temporary variables that are not allowed to use for storage between
2907 // signals. They
2908 // can only be used in a signal to transfer values between subroutines.
2909 // In the long run
2910 // those variables should be removed and exchanged for stack
2911 // variable communication.
2912 /**************************************************************************/
2913
2914 Uint32 c_gcp_ref;
2915 Uint32 c_gcp_data;
2916
2917 Uint32 c_sttor_ref;
2918
2919 Uint32 m_load_balancer_location;
2920
2921 #ifdef ERROR_INSERT
2922 // Used with ERROR_INSERT 8078 + 8079 to check API_FAILREQ handling
2923 Uint32 c_lastFailedApi;
2924 #endif
2925 Uint32 m_deferred_enabled;
2926 Uint32 m_max_writes_per_trans;
2927 Uint32 m_take_over_operations;
2928 #endif
2929
2930 #ifndef DBTC_STATE_EXTRACT
2931 void dump_trans(ApiConnectRecordPtr transPtr);
2932 bool hasOp(ApiConnectRecordPtr transPtr, Uint32 op);
2933
2934 public:
2935 static Uint64 getTransactionMemoryNeed(
2936 const Uint32 dbtc_instance_count,
2937 const ndb_mgm_configuration_iterator * mgm_cfg,
2938 const bool use_reserved);
2939 #endif
2940 };
2941
2942 #ifndef DBTC_STATE_EXTRACT
checkPoolShrinkNeed(const Uint32 pool_index,const TransientFastSlotPool & pool)2943 inline void Dbtc::checkPoolShrinkNeed(const Uint32 pool_index,
2944 const TransientFastSlotPool& pool)
2945 {
2946 #if defined(VM_TRACE) || defined(ERROR_INSERT)
2947 ndbrequire(pool_index < c_transient_pool_count);
2948 ndbrequire(c_transient_pools[pool_index] == &pool);
2949 #endif
2950 if (pool.may_shrink())
2951 {
2952 sendPoolShrink(pool_index);
2953 }
2954 }
2955
2956 #endif
2957
2958 #ifdef DBTC_MAIN
setApiConTimer(ApiConnectRecordPtr apiConPtr,Uint32 value,Uint32 line)2959 void Dbtc::setApiConTimer(ApiConnectRecordPtr apiConPtr, Uint32 value, Uint32 line)
2960 {
2961 const Uint32 apiConTimer = apiConPtr.p->m_apiConTimer;
2962 ApiConTimersPtr apiConTimers;
2963 ndbrequire(apiConTimer != RNIL);
2964 apiConTimers.i = apiConTimer >> ApiConTimers::INDEX_BITS;
2965 c_apiConTimersPool.getPtr(apiConTimers);
2966 const Uint32 timer_index = apiConTimer & ApiConTimers::INDEX_MASK;
2967 ndbassert(timer_index < apiConTimers.p->m_top);
2968 ndbassert(apiConTimers.p->m_count > 0);
2969 ndbassert(apiConTimers.p->m_entries[timer_index].m_apiConnectRecord ==
2970 apiConPtr.i);
2971
2972 apiConTimers.p->m_entries[timer_index].m_timer = value;
2973 apiConPtr.p->m_apiConTimer_line = line;
2974 }
2975 #endif
2976
2977 #undef JAM_FILE_ID
2978
2979 #endif
2980