1 /*
2 Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #ifndef BACKUP_H
26 #define BACKUP_H
27
28 #include <ndb_limits.h>
29 #include <SimulatedBlock.hpp>
30
31 #include "FsBuffer.hpp"
32 #include "BackupFormat.hpp"
33
34 #include <NodeBitmask.hpp>
35 #include <SimpleProperties.hpp>
36
37 #include <IntrusiveList.hpp>
38 #include <SignalCounter.hpp>
39 #include <blocks/mutexes.hpp>
40
41 #include <NdbTCP.h>
42 #include <NdbTick.h>
43 #include <Array.hpp>
44 #include <Mutex.hpp>
45
46 #include <signaldata/RedoStateRep.hpp>
47 #include "../dblqh/Dblqh.hpp"
48 #include <signaldata/BackupSignalData.hpp>
49
50 #define JAM_FILE_ID 474
51
52
53 /**
54 * Backup - This block manages database backup and restore
55 */
56 class Backup : public SimulatedBlock
57 {
58 friend class BackupProxy;
59
60 public:
61 Backup(Block_context& ctx, Uint32 instanceNumber = 0);
62 virtual ~Backup();
63 BLOCK_DEFINES(Backup);
64
65 class Dblqh* c_lqh;
66 class Dbtup* c_tup;
67 class Lgman* c_lgman;
68 class Pgman* c_pgman;
69
70 enum CallbackIndex {
71 // lgman
72 SYNC_LOG_LCP_LSN = 1,
73 // pgman
74 COUNT_CALLBACKS = 2
75 };
76 CallbackEntry m_callbackEntry[COUNT_CALLBACKS];
77 CallbackTable m_callbackTable;
78
79 protected:
80
81 void execSTTOR(Signal* signal);
82 void execREDO_STATE_REP(Signal*);
83 void execREAD_CONFIG_REQ(Signal* signal);
84 void execDUMP_STATE_ORD(Signal* signal);
85 void execREAD_NODESCONF(Signal* signal);
86 void execNODE_FAILREP(Signal* signal);
87 void execINCL_NODEREQ(Signal* signal);
88 void execCONTINUEB(Signal* signal);
89
90 /**
91 * Testing
92 */
93 void execBACKUP_REF(Signal* signal);
94 void execBACKUP_CONF(Signal* signal);
95 void execBACKUP_ABORT_REP(Signal* signal);
96 void execBACKUP_COMPLETE_REP(Signal* signal);
97
98 /**
99 * Signals sent from master
100 */
101 void execDEFINE_BACKUP_REQ(Signal* signal);
102 void execBACKUP_DATA(Signal* signal);
103 void execSTART_BACKUP_REQ(Signal* signal);
104 void execBACKUP_FRAGMENT_REQ(Signal* signal);
105 void execBACKUP_FRAGMENT_COMPLETE_REP(Signal* signal);
106 void execSTOP_BACKUP_REQ(Signal* signal);
107 void execBACKUP_STATUS_REQ(Signal* signal);
108 void execABORT_BACKUP_ORD(Signal* signal);
109
110 /**
111 * The actual scan
112 */
113 void execSCAN_HBREP(Signal* signal);
114 void execTRANSID_AI(Signal* signal);
115 void execSCAN_FRAGREF(Signal* signal);
116 void execSCAN_FRAGCONF(Signal* signal);
117
118 /**
119 * Trigger logging
120 */
121 void execBACKUP_TRIG_REQ(Signal* signal);
122 void execTRIG_ATTRINFO(Signal* signal);
123 void execFIRE_TRIG_ORD(Signal* signal);
124
125 /**
126 * DICT signals
127 */
128 void execLIST_TABLES_CONF(Signal* signal);
129 void execGET_TABINFOREF(Signal* signal);
130 void execGET_TABINFO_CONF(Signal* signal);
131 void execCREATE_TRIG_IMPL_REF(Signal* signal);
132 void execCREATE_TRIG_IMPL_CONF(Signal* signal);
133 void execDROP_TRIG_IMPL_REF(Signal* signal);
134 void execDROP_TRIG_IMPL_CONF(Signal* signal);
135
136 /**
137 * DIH signals
138 */
139 void execDIH_SCAN_TAB_CONF(Signal* signal);
140 void execCHECK_NODE_RESTARTCONF(Signal*);
141
142 /**
143 * FS signals
144 */
145 void execFSOPENREF(Signal* signal);
146 void execFSOPENCONF(Signal* signal);
147
148 void execFSCLOSEREF(Signal* signal);
149 void execFSCLOSECONF(Signal* signal);
150
151 void execFSAPPENDREF(Signal* signal);
152 void execFSAPPENDCONF(Signal* signal);
153
154 void execFSREMOVEREF(Signal* signal);
155 void execFSREMOVECONF(Signal* signal);
156
157 void execFSREADREF(Signal* signal);
158 void execFSREADCONF(Signal* signal);
159
160 void execFSWRITEREF(Signal* signal);
161 void execFSWRITECONF(Signal* signal);
162
163 /**
164 * Master functinallity
165 */
166 void execBACKUP_REQ(Signal* signal);
167 void execABORT_BACKUP_REQ(Signal* signal);
168
169 void execDEFINE_BACKUP_REF(Signal* signal);
170 void execDEFINE_BACKUP_CONF(Signal* signal);
171
172 void execSTART_BACKUP_REF(Signal* signal);
173 void execSTART_BACKUP_CONF(Signal* signal);
174
175 void execBACKUP_FRAGMENT_REF(Signal* signal);
176 void execBACKUP_FRAGMENT_CONF(Signal* signal);
177
178 void execSTOP_BACKUP_REF(Signal* signal);
179 void execSTOP_BACKUP_CONF(Signal* signal);
180
181 void execBACKUP_STATUS_CONF(Signal* signal);
182
183 void execUTIL_SEQUENCE_REF(Signal* signal);
184 void execUTIL_SEQUENCE_CONF(Signal* signal);
185
186 void execWAIT_GCP_REF(Signal* signal);
187 void execWAIT_GCP_CONF(Signal* signal);
188 void execBACKUP_LOCK_TAB_CONF(Signal *signal);
189 void execBACKUP_LOCK_TAB_REF(Signal *signal);
190
191 void execLCP_PREPARE_REQ(Signal* signal);
192 void execLCP_FRAGMENT_REQ(Signal*);
193 void execSYNC_PAGE_WAIT_REP(Signal*);
194 void execSYNC_PAGE_CACHE_CONF(Signal*);
195 void execSYNC_EXTENT_PAGES_CONF(Signal*);
196 void execEND_LCPREQ(Signal* signal);
197 void execINFORM_BACKUP_DROP_TAB_REQ(Signal*);
198 void execWAIT_LCP_IDLE_REQ(Signal*);
199
200 void execDBINFO_SCANREQ(Signal *signal);
201
202 void execRESTORABLE_GCI_REP(Signal *signal);
203
204 void execLCP_STATUS_REQ(Signal* signal);
205
206 private:
207 void defineBackupMutex_locked(Signal* signal, Uint32 ptrI,Uint32 retVal);
208 void dictCommitTableMutex_locked(Signal* signal, Uint32 ptrI,Uint32 retVal);
209 void startDropTrig_synced(Signal* signal, Uint32 ptrI, Uint32 retVal);
210
211 public:
212 struct Node {
213 Uint32 nodeId;
214 Uint32 alive;
215 Uint32 nextList;
216 union { Uint32 prevList; Uint32 nextPool; };
217 };
218 typedef Ptr<Node> NodePtr;
219 typedef ArrayPool<Node> Node_pool;
220 typedef SLList<Node_pool> Node_list;
221
222 void update_lcp_pages_scanned(Signal *signal,
223 Uint32 filePtrI,
224 Uint32 scanned_pages,
225 Uint32 & scanGCI,
226 bool & changed_row_page_flag);
227
is_partial_lcp_enabled()228 bool is_partial_lcp_enabled()
229 {
230 return m_enable_partial_lcp != 0;
231 }
is_redo_control_enabled()232 bool is_redo_control_enabled()
233 {
234 return m_enable_redo_control != 0;
235 }
236
get_recovery_work()237 Uint32 get_recovery_work()
238 {
239 return m_recovery_work;
240 }
get_insert_recovery_work()241 Uint32 get_insert_recovery_work()
242 {
243 return m_insert_recovery_work;
244 }
245 void init_extended_lcp_stat();
246 void print_extended_lcp_stat();
247 void alloc_page_after_lcp_start(Uint32 page_no);
248 void alloc_dropped_page_after_lcp_start(bool is_change_page);
249 void dropped_page_after_lcp_start(bool is_change_page,
250 bool is_last_lcp_state_A);
251 void skip_page_lcp_scanned_bit();
252 void skip_no_change_page();
253 void skip_empty_page_lcp();
254 void record_dropped_empty_page_lcp();
255 void record_late_alloc_page_lcp();
256 void page_to_skip_lcp(bool is_last_lcp_state_A);
257 void lcp_keep_row();
258 void lcp_keep_delete_row();
259 void lcp_keep_delete_by_page_id();
260
261 void init_lcp_scan(Uint32 & scanGCI,
262 bool & changed_row_page_flag);
263 void end_lcp_scan(Uint32 number_of_pages);
264
265 void record_deleted_rowid(Uint32 pageNo,
266 Uint32 pageIndex,
267 Uint32 gci);
268
269 void record_deleted_pageid(Uint32 pageNo, Uint32 record_size);
270
271 int is_page_lcp_scanned(Uint32 page_id, bool & all_part);
272 Uint32 hash_lcp_part(Uint32 page_id) const;
273
274 #define BACKUP_WORDS_PER_PAGE 8191
275 struct Page32 {
276 Uint32 data[BACKUP_WORDS_PER_PAGE];
277 Uint32 nextPool;
278 };
279 typedef Ptr<Page32> Page32Ptr;
280 typedef ArrayPool<Page32> Page32_pool;
281
282 struct Fragment {
283 Uint64 noOfRecords;
284 Uint32 tableId;
285 Uint32 createGci;
286 Uint16 node;
287 Uint16 fragmentId;
288 Uint8 lqhInstanceKey;
289 Uint8 scanned; // 0 = not scanned x = scanned by node x
290 Uint8 scanning; // 0 = not scanning x = scanning on node x
291 Uint8 firstFragment;
292 Uint32 nextPool;
293 };
294 typedef Ptr<Fragment> FragmentPtr;
295 typedef ArrayPool<Fragment> Fragment_pool;
296
297 struct DeleteLcpFile
298 {
299 Uint64 lcpLsn;
300 Uint32 tableId;
301 Uint32 fragmentId;
302 Uint32 firstFileId;
303 Uint32 lastFileId;
304 Uint32 waitCompletedGci;
305 Uint32 lcpCtlFileNumber;
306 Uint32 validFlag;
307 union
308 {
309 Uint32 nextPool;
310 Uint32 nextList;
311 };
312 Uint32 prevList;
313 };
314 typedef Ptr<DeleteLcpFile> DeleteLcpFilePtr;
315 typedef ArrayPool<DeleteLcpFile> DeleteLcpFile_pool;
316 typedef DLCFifoList<DeleteLcpFile_pool> DeleteLcpFile_list;
317 typedef LocalDLCFifoList<DeleteLcpFile_pool>
318 LocalDeleteLcpFile_list;
319 DeleteLcpFile_list::Head m_delete_lcp_file_head;
320
321 Uint32 m_newestRestorableGci;
322 bool m_delete_lcp_files_ongoing;
323 Uint32 m_enable_redo_control;
324 Uint32 m_enable_partial_lcp;
325 Uint32 m_recovery_work;
326 Uint32 m_insert_recovery_work;
327
328 Uint32 m_cfg_mt_backup;
329 bool m_skew_disk_speed;
330
331 struct Table {
332 Table(Fragment_pool &);
333
334 Uint64 noOfRecords;
335
336 Uint32 tableId;
337 Uint32 backupPtrI;
338 Uint32 schemaVersion;
339 Uint32 tableType;
340 Uint32 m_scan_cookie;
341 Uint32 triggerIds[3];
342 bool triggerAllocated[3];
343 Uint32 maxRecordSize;
344 Uint32 attrInfoLen;
345 Uint32 noOfAttributes;
346 /**
347 * AttributeHeader::READ_PACKED + full mask + ( DISKREF ROWID ROWGCI )
348 */
349 Uint32 attrInfo[1+MAXNROFATTRIBUTESINWORDS+3];
350
351 Array<Fragment> fragments;
352
353 Uint32 nextList;
354 union { Uint32 nextPool; Uint32 prevList; };
355 /**
356 * Pointer used by c_tableMap
357 */
358 Uint32 nextMapTable;
359 };
360 typedef Ptr<Table> TablePtr;
361 typedef ArrayPool<Table> Table_pool;
362 typedef SLList<Table_pool> Table_list;
363 typedef DLCFifoList<Table_pool> Table_fifo;
364
365 struct OperationRecord {
366 public:
OperationRecordBackup::OperationRecord367 OperationRecord(Backup & b) : backup(b) {}
368
369 /**
370 * Once per table
371 */
372 void init(const TablePtr & ptr);
373
374 /**
375 * Once per fragment
376 */
377 bool newFragment(Uint32 tableId, Uint32 fragNo);
378 bool fragComplete(Uint32 tableId, Uint32 fragNo, bool fill_record);
379
380 /**
381 * Once per scan frag (next) req/conf
382 */
383 bool newScan();
384 void scanConf(Uint32 noOfOps, Uint32 opLen, Uint32 buffer_data_len);
385 Uint32 publishBufferData();
386 void closeScan();
387
388 /**
389 * Per record
390 */
391 void newRecord(Uint32 * base);
392 void finished(Uint32 len);
393 void set_scanned_pages(Uint32 num_scanned_pages);
394
395 private:
396 Uint32* base;
397 Uint32 opNoDone;
398 Uint32 opNoConf;
399 Uint32 opLen;
400
401 public:
402 Uint32* dst;
403 Uint32 tablePtr; // Ptr.i to current table
404
405 FsBuffer dataBuffer;
406 Uint64 noOfRecords;
407 Uint64 noOfBytes;
408 Uint32 maxRecordSize;
409 Uint32 lcpScannedPages;
410
411 /*
412 keeps track of total written into backup file to be able to show
413 backup status
414 */
415 Uint64 m_records_total;
416 Uint64 m_bytes_total;
417
418 private:
419 Uint32* scanStart;
420 Uint32* scanStop;
421
422 public:
423 union { Uint32 nextPool; Uint32 nextList; };
424 Uint32 prevList;
425 private:
426
427 Backup & backup;
numberBackup::OperationRecord428 BlockNumber number() const { return backup.number(); }
jamBufferBackup::OperationRecord429 EmulatedJamBuffer *jamBuffer() const { return backup.jamBuffer(); }
progErrorBackup::OperationRecord430 [[noreturn]] void progError(int line,
431 int cause,
432 const char * extra,
433 const char * check)
434 {
435 backup.progError(line, cause, extra, check);
436 }
437 };
438 friend struct OperationRecord;
439
440 struct TriggerRecord {
TriggerRecordBackup::TriggerRecord441 TriggerRecord() { event = ~0;}
442 OperationRecord * operation;
443 BackupFormat::LogFile::LogEntry * logEntry;
444 Uint32 tableId;
445 Uint32 tab_ptr_i;
446 Uint32 event;
447 Uint32 backupPtr;
448 Uint32 errorCode;
449 union { Uint32 nextPool; Uint32 nextList; };
450 };
451 typedef Ptr<TriggerRecord> TriggerPtr;
452 typedef ArrayPool<TriggerRecord> TriggerRecord_pool;
453 typedef SLList<TriggerRecord_pool> TriggerRecord_list;
454
455 /**
456 * BackupFile - At least 3 per backup
457 */
458 struct BackupFile {
BackupFileBackup::BackupFile459 BackupFile(Backup & backup, Page32_pool& pp)
460 : operation(backup), pages(pp) { m_retry_count = 0; }
461
462 Uint32 backupPtr; // Pointer to backup record
463 Uint32 tableId;
464 Uint32 fragmentNo;
465 Uint32 filePointer;
466 Uint32 bytesRead;
467
468 Uint32 m_retry_count;
469 Uint32 errorCode;
470 BackupFormat::FileType fileType;
471 OperationRecord operation;
472
473 Uint64 m_lcp_inserts;
474 Uint64 m_lcp_writes;
475 Uint64 m_lcp_delete_by_pageids;
476 Uint64 m_lcp_delete_by_rowids;
477
478 Array<Page32> pages;
479 Uint32 nextList;
480 union { Uint32 prevList; Uint32 nextPool; };
481
482 enum {
483 BF_OPEN = 0x1
484 ,BF_OPENING = 0x2
485 ,BF_CLOSING = 0x4
486 ,BF_FILE_THREAD = 0x8
487 ,BF_SCAN_THREAD = 0x10
488 ,BF_LCP_META = 0x20
489 ,BF_HEADER_FILE = 0x40
490 ,BF_READING = 0x80
491 ,BF_WRITING = 0x100
492 ,BF_REMOVING = 0x200
493 };
494 Uint32 m_flags;
495 Uint32 m_pos;
496 };
497 typedef Ptr<BackupFile> BackupFilePtr;
498 typedef ArrayPool<BackupFile> BackupFile_pool;
499 typedef SLList<BackupFile_pool> BackupFile_list;
500
501 /**
502 * State for BackupRecord
503 */
504 enum State {
505 INITIAL = 0,
506 DEFINING = 1, // Defining backup content and parameters
507 DEFINED = 2, // DEFINE_BACKUP_CONF sent in slave, received all in master
508 STARTED = 3, // Creating triggers
509 SCANNING = 4, // Scanning fragments
510 STOPPING = 5, // Closing files
511 CLEANING = 6, // Cleaning resources
512 ABORTING = 7 // Aborting backup
513 };
514
515 enum PrepareState
516 {
517 NOT_ACTIVE = 0,
518 PREPARE_READ_CTL_FILES = 1,
519 PREPARE_OPEN_DATA_FILE = 2,
520 PREPARE_READ_TABLE_DESC = 3,
521 PREPARED = 4,
522 PREPARE_ABORTING = 5,
523 PREPARE_DROP = 6,
524 PREPARE_DROP_CLOSE = 7
525 };
526
527 static const Uint32 validSlaveTransitionsCount;
528 static const Uint32 validMasterTransitionsCount;
529 static const State validSlaveTransitions[];
530 static const State validMasterTransitions[];
531
532 class CompoundState {
533 public:
CompoundState(Backup & b,const State valid[],Uint32 count,Uint32 _id)534 CompoundState(Backup & b,
535 const State valid[],
536 Uint32 count, Uint32 _id)
537 : backup(b)
538 , validTransitions(valid),
539 noOfValidTransitions(count), id(_id)
540 {
541 state = INITIAL;
542 abortState = state;
543 }
544
545 void setState(State s);
getState() const546 State getState() const { return state;}
getAbortState() const547 State getAbortState() const { return abortState;}
548
549 void forceState(State s);
550
number() const551 BlockNumber number() const { return backup.number(); }
jamBuffer() const552 EmulatedJamBuffer *jamBuffer() const { return backup.jamBuffer(); }
progError(int line,int cause,const char * extra,const char * check)553 [[noreturn]] void progError(int line,
554 int cause,
555 const char * extra,
556 const char * check)
557 {
558 backup.progError(line, cause, extra, check);
559 }
560 private:
561 Backup & backup;
562 State state;
563 State abortState; /**
564 When state == ABORTING, this contains the state
565 when the abort started
566 */
567 const State * validTransitions;
568 const Uint32 noOfValidTransitions;
569 const Uint32 id;
570 };
571 friend class CompoundState;
572
573 /**
574 * Backup record
575 *
576 * One record per backup
577 */
578 struct BackupRecord {
BackupRecordBackup::BackupRecord579 BackupRecord(Backup& b,
580 Table_pool& tp,
581 BackupFile_pool& bp,
582 TriggerRecord_pool& trp)
583 : slaveState(b, validSlaveTransitions, validSlaveTransitionsCount,1)
584 , m_first_fragment(false), prepare_table(tp), tables(tp)
585 , triggers(trp), files(bp)
586 , ctlFilePtr(RNIL), logFilePtr(RNIL)
587 , masterData(b), backup(b)
588 {
589 m_wait_end_lcp = false;
590 m_wait_empty_queue = false;
591 m_initial_lcp_started = false;
592 m_wait_gci_to_delete = 0;
593 localLcpId = 0;
594 m_wait_data_file_close = false;
595 m_disk_data_exist = false;
596 m_wait_sync_extent = false;
597 m_wait_final_sync_extent = false;
598 m_wait_disk_data_sync = false;
599 m_num_sync_pages_waiting = 0;
600 m_num_sync_extent_pages_written = 0;
601 /*
602 report of backup status uses these variables to keep track
603 if backup ia running and current state
604 */
605 m_gsn = 0;
606 masterData.gsn = 0;
607 m_informDropTabTableId = Uint32(~0);
608 m_informDropTabReference = Uint32(~0);
609 currentDeleteLcpFile = RNIL;
610 noOfRecords = 0;
611 noOfBytes = 0;
612 for (Uint32 i = 0; i < BackupFormat::NDB_MAX_FILES_PER_LCP; i++)
613 {
614 dataFilePtr[i] = RNIL;
615 prepareDataFilePtr[i] = RNIL;
616 }
617 idleFragWorkerCount = 0;
618 }
619
620 /* prev time backup status was reported */
621 NDB_TICKS m_prev_report;
622
623 bool m_wait_end_lcp;
624 /**
625 * DBLQH have requested us to report when LCP activity ceases.
626 * If this variable is true we are waiting for delete file
627 * queue to become empty to respond that LCP activity is idle.
628 */
629 bool m_wait_empty_queue;
630 bool m_initial_lcp_started;
631 Uint32 m_gsn;
632 Uint32 m_lastSignalId;
633 Uint32 m_prioA_scan_batches_to_execute;
634 CompoundState slaveState;
635
636 bool m_first_fragment;
637 /**
638 * Which header file is used for this LCP, there are only two 0 and 1.
639 *
640 * We record the startPart and the number of parts used in this LCP.
641 *
642 * We record the data file number used for this LCP.
643 *
644 * We record the newestGci that was part of this LCP, this value is
645 * recorded in the header file to ensure that we use the proper set
646 * of LCP files to restore.
647 */
648 Uint32 prepareNextLcpCtlFileNumber;
649 Uint32 prepareDeleteCtlFileNumber;
650 Uint32 prepareMaxGciWritten;
651 PrepareState prepareState;
652 Uint32 prepareFirstDataFileNumber;
653 Uint32 preparePrevLcpId;
654 Uint32 preparePrevLocalLcpId;
655 Uint32 prepareErrorCode;
656 Table_fifo prepare_table;
657 Uint32 m_prepare_scan_change_gci;
658 Uint32 m_prepare_first_start_part_in_lcp;
659 Uint32 m_prepare_num_parts_in_lcp;
660 Uint32 m_prepare_max_parts_in_lcp;
661 BackupFormat::PartPair
662 m_prepare_part_info[BackupFormat::NDB_MAX_LCP_PARTS];
663
664 struct LcpScanInfo
665 {
666 Uint32 m_start_all_part;
667 Uint32 m_num_all_parts;
668 Uint32 m_start_change_part;
669 Uint32 m_num_change_parts;
670 };
671 /**
672 * Handle later, LCP processing.
673 */
674 Uint64 m_row_count;
675 Uint64 m_prev_row_count;
676 Uint64 m_row_change_count;
677 Uint64 m_memory_used_in_bytes;
678 bool m_empty_lcp;
679 bool m_is_lcp_scan_active;
680 bool m_working_changed_row_page_flag;
681 bool m_current_changed_row_page_flag;
682 Uint32 m_outstanding_operations;
683 Uint32 m_first_start_part_in_lcp;
684 Uint32 m_num_parts_in_this_lcp;
685 Uint32 m_num_parts_in_lcp;
686 Uint32 m_max_parts_in_lcp;
687 Uint32 m_lcp_current_page_scanned;
688 Uint32 m_lcp_max_page_cnt;
689 Uint32 m_scan_change_gci;
690 Uint32 m_lcp_remove_files;
691 Uint32 m_num_lcp_files;
692 Uint32 m_num_lcp_data_files_open;
693 Uint32 m_first_data_file_number;
694 Uint32 m_last_data_file_number;
695 Uint32 m_current_data_file_ptr;
696 Uint32 m_working_data_file_ptr;
697 Uint64 m_current_lcp_lsn;
698
699 Uint32 m_save_error_code;
700 Uint32 m_change_page_alloc_after_start;
701 Uint32 m_all_page_alloc_after_start;
702 Uint32 m_change_page_alloc_dropped_after_start;
703 Uint32 m_all_page_alloc_dropped_after_start;
704 Uint32 m_change_page_dropped_A_after_start;
705 Uint32 m_all_page_dropped_A_after_start;
706 Uint32 m_change_page_dropped_D_after_start;
707 Uint32 m_all_page_dropped_D_after_start;
708 Uint32 m_skip_change_page_no_change;
709 Uint32 m_skip_change_page_lcp_scanned_bit;
710 Uint32 m_skip_all_page_lcp_scanned_bit;
711 Uint32 m_skip_empty_change_page;
712 Uint32 m_skip_empty_all_page;
713 Uint32 m_record_empty_change_page_A;
714 Uint32 m_record_late_alloc_change_page_A;
715 Uint32 m_skip_late_alloc_change_page_D;
716 Uint32 m_skip_late_alloc_all_page_A;
717 Uint32 m_skip_late_alloc_all_page_D;
718 Uint64 m_lcp_keep_row_change_pages;
719 Uint64 m_lcp_keep_row_all_pages;
720 Uint64 m_lcp_keep_delete_row_change_pages;
721 Uint64 m_lcp_keep_delete_row_all_pages;
722 Uint32 m_lcp_keep_delete_change_pages;
723 Uint32 m_lcp_keep_delete_all_pages;
724
725 Uint64 m_last_recorded_bytes_written;
726 Uint64 m_pause_counter;
727 Uint64 m_row_scan_counter;
728 NDB_TICKS m_last_delay_scan_timer;
729 NDB_TICKS m_scan_start_timer;
730
731 Uint32 m_num_scan_req_on_prioa;
732
733 bool m_any_lcp_page_ops;
734
735 BackupFormat::PartPair m_part_info[BackupFormat::NDB_MAX_LCP_PARTS];
736 LcpScanInfo m_scan_info[BackupFormat::NDB_MAX_FILES_PER_LCP];
737
738 Uint32 m_informDropTabTableId;
739 BlockReference m_informDropTabReference;
740
741 Uint32 newestGci;
742 Uint32 m_wait_gci_to_delete;
743 Uint32 deleteCtlFileNumber;
744 Uint32 deleteDataFileNumber;
745
746 /* State variables for finalisation of LCP processing of a fragment. */
747 bool m_disk_data_exist;
748 bool m_wait_data_file_close;
749 bool m_wait_disk_data_sync;
750 bool m_wait_sync_extent;
751 bool m_wait_final_sync_extent;
752 bool m_lcp_lsn_synced;
753
754 Uint32 m_num_sync_pages_waiting;
755 Uint32 m_num_sync_extent_pages_written;
756 /* Data for delete LCP file process */
757 Uint32 deleteFilePtr;
758 Uint32 currentDeleteLcpFile;
759 bool m_delete_data_file_ongoing;
760
761 Uint32 backupId; /* LCP id for LCPs, backupId for backups */
762 Uint32 localLcpId; /* Local LCP id for local LCP (0 otherwise) */
763
764 /**
765 * Error code for backup and for execution phase of LCP.
766 */
767 Uint32 errorCode;
768 /**
769 * List of tables for backups, used during LCP execution phase, for
770 * LCP it only contains one table, so can always be fetched using
771 * the first call.
772 */
773 Table_fifo tables;
774
775 /**
776 * The below set of variables are only used by backups.
777 */
778 Uint32 signalNo;
779
780 Uint32 clientRef;
781 Uint32 clientData;
782 Uint32 senderRef;
783 Uint32 senderData;
784 Uint32 flags;
785 Uint32 backupKey[2];
786 Uint32 masterRef;
787 NdbNodeBitmask nodes;
788
789 Bitmask<(Uint32)(MAX_NDBMT_LQH_THREADS/sizeof(Uint32))> fragWorkers[MAX_NDB_NODES];
790 Uint32 idleFragWorkerCount;
791
792 /**
793 * Statistical variables for LCP and Backup, initialised when
794 * starting LCP and incremented during LCP execution phase.
795 */
796 Uint64 noOfBytes;
797 Uint64 noOfRecords;
798 /* m_bytes_written is used for scheduling of LCP and Backups */
799 Uint64 m_bytes_written;
800
801 /**
802 * Statistical variables for backups.
803 */
804 Uint64 noOfLogBytes;
805 Uint64 noOfLogRecords;
806
807 /**
808 * Backup variables
809 */
810 Uint32 startGCP;
811 Uint32 currGCP;
812 Uint32 stopGCP;
813 TriggerRecord_list triggers;
814
815
816 /**
817 * The list of file records used in this block by both
818 * backups and LCPs.
819 */
820 BackupFile_list files;
821 Uint32 ctlFilePtr; // Ptr.i to ctl-file (LCP and Backup)
822 Uint32 logFilePtr; // Ptr.i to log-file (Only backup)
823 Uint32 dataFilePtr[BackupFormat::NDB_MAX_FILES_PER_LCP];
824 // Ptr.i to first data-file (LCP and Backup)
825 Uint32 prepareDataFilePtr[BackupFormat::NDB_MAX_FILES_PER_LCP]; // Only LCP
826 Uint32 prepareCtlFilePtr[2]; // Ptr.i to ctl-file for LCP prepare
827
828 Uint32 backupDataLen; // Used for (un)packing backup request
829 SimpleProperties props;// Used for (un)packing backup request
830
831 NDB_TICKS m_start_sync_op;
832 NDB_TICKS m_high_res_lcp_start_time;
833
834 struct SlaveData {
835 SignalCounter trigSendCounter;
836 Uint32 gsn;
837 struct {
838 Uint32 tableId;
839 } createTrig;
840 struct {
841 Uint32 tableId;
842 } dropTrig;
843 } slaveData;
844
845 struct MasterData {
MasterDataBackup::BackupRecord::MasterData846 MasterData(Backup & b)
847 {
848 }
849 MutexHandle2<BACKUP_DEFINE_MUTEX> m_defineBackupMutex;
850 MutexHandle2<DICT_COMMIT_TABLE_MUTEX> m_dictCommitTableMutex;
851
852 Uint32 gsn;
853 SignalCounter sendCounter;
854 Uint32 errorCode;
855 union {
856 struct {
857 Uint32 retriesLeft;
858 } sequence;
859 struct {
860 Uint32 startBackup;
861 } waitGCP;
862 struct {
863 Uint32 signalNo;
864 Uint32 noOfSignals;
865 Uint32 tablePtr;
866 } startBackup;
867 struct {
868 Uint32 dummy;
869 } stopBackup;
870 };
871 } masterData;
872
873 Uint32 nextList;
874 union { Uint32 prevList; Uint32 nextPool; };
875
setErrorCodeBackup::BackupRecord876 void setErrorCode(Uint32 errCode){
877 if(errorCode == 0)
878 errorCode = errCode;
879 }
880
setPrepareErrorCodeBackup::BackupRecord881 void setPrepareErrorCode(Uint32 errCode){
882 if(prepareErrorCode == 0)
883 prepareErrorCode = errCode;
884 }
885
checkErrorBackup::BackupRecord886 bool checkError() const {
887 return errorCode != 0;
888 }
889
checkPrepareErrorBackup::BackupRecord890 bool checkPrepareError() const {
891 return prepareErrorCode != 0;
892 }
893
is_lcpBackup::BackupRecord894 bool is_lcp() const {
895 return backupDataLen == ~(Uint32)0;
896 }
897
898 Backup & backup;
numberBackup::BackupRecord899 BlockNumber number() const { return backup.number(); }
jamBufferBackup::BackupRecord900 EmulatedJamBuffer *jamBuffer() const { return backup.jamBuffer(); }
progErrorBackup::BackupRecord901 [[noreturn]] void progError(int line,
902 int cause,
903 const char * extra,
904 const char * check)
905 {
906 backup.progError(line, cause, extra, check);
907 }
908 };
909 friend struct BackupRecord;
910 typedef Ptr<BackupRecord> BackupRecordPtr;
911 typedef ArrayPool<BackupRecord> BackupRecord_pool;
912 typedef SLList<BackupRecord_pool> BackupRecord_sllist;
913 typedef DLList<BackupRecord_pool> BackupRecord_dllist;
914
915 /**
916 * Number of words needed in buff to start a new scan batch
917 * (Which can directly write a number of rows of max size
918 * into the buffer)
919 */
920 #define BACKUP_MIN_BUFF_WORDS (ZRESERVED_SCAN_BATCH_SIZE * \
921 (MAX_TUPLE_SIZE_IN_WORDS + \
922 MAX_ATTRIBUTES_IN_TABLE + \
923 128))
924
925 #define MAX_BUFFER_USED_WITHOUT_REDO_ALERT (512 * 1024)
926 #define BACKUP_DEFAULT_WRITE_SIZE (256 * 1024)
927 #define BACKUP_DEFAULT_BUFFER_SIZE (2 * 1024 * 1024)
928 #define BACKUP_DEFAULT_LOGBUFFER_SIZE (16 * 1024 * 1024)
929
930 struct Config {
931 Uint32 m_dataBufferSize;
932 Uint32 m_logBufferSize;
933 Uint32 m_minWriteSize;
934 Uint32 m_maxWriteSize;
935 Uint32 m_lcp_buffer_size;
936
937 Uint64 m_disk_write_speed_min;
938 Uint64 m_disk_write_speed_max;
939 Uint64 m_disk_write_speed_max_other_node_restart;
940 Uint64 m_disk_write_speed_max_own_restart;
941 Uint32 m_backup_disk_write_pct;
942 Uint32 m_disk_synch_size;
943 Uint32 m_diskless;
944 Uint32 m_o_direct;
945 Uint32 m_compressed_backup;
946 Uint32 m_compressed_lcp;
947 };
948
949 /**
950 * Variables
951 */
952 Uint32 * c_startOfPages;
953 /**
954 * Map from tableId to tabPtr.i to speed up findTable
955 * If the same table is mapped to several backups we will
956 * look for the table with the correct backupPtr.
957 */
958 Uint32 * c_tableMap;
959 NodeId c_masterNodeId;
960 Node_list c_nodes;
961 NdbNodeBitmask c_aliveNodes;
962 BackupRecord_dllist c_backups;
963 Config c_defaults;
964
965 /*
966 Variables that control checkpoint to disk speed
967 */
968 bool m_is_lcp_running;
969 bool m_is_backup_running;
970 bool m_is_any_node_restarting;
971 bool m_node_restart_check_sent;
972 bool m_our_node_started;
973 Uint64 m_curr_disk_write_speed;
974 Uint64 m_curr_backup_disk_write_speed;
975 Uint64 m_words_written_this_period;
976 Uint64 m_backup_words_written_this_period;
977 Uint64 m_overflow_disk_write;
978 Uint64 m_backup_overflow_disk_write;
979 Uint32 m_reset_delay_used;
980 NDB_TICKS m_reset_disk_speed_time;
981
982 //#ifdef VM_TRACE
983 Uint64 m_debug_redo_log_count;
984 //#endif
985
986 /* Keep track of disk data usage in checkpoints */
987 Uint64 m_current_dd_time_us;
988 Uint32 m_last_lcp_dd_percentage;
989 Uint32 m_undo_log_level_percentage;
990 Uint32 m_max_undo_log_level_percentage;
991
992 RedoStateRep::RedoAlertState m_redo_alert_state;
993 RedoStateRep::RedoAlertState m_local_redo_alert_state;
994 RedoStateRep::RedoAlertState m_global_redo_alert_state;
995 Uint32 m_redo_alert_factor;
996 BackupRecordPtr m_lcp_ptr;
997
998 NDB_TICKS m_lcp_start_time;
999 NDB_TICKS m_prev_lcp_start_time;
1000 NDB_TICKS m_lcp_current_cut_point;
1001 Uint64 m_last_redo_used_in_bytes;
1002 Uint64 m_last_lcp_exec_time_in_ms;
1003 Uint64 m_max_redo_speed_per_sec;
1004 Uint64 m_update_size_lcp[2];
1005 Uint64 m_update_size_lcp_last;
1006 Uint64 m_insert_size_lcp[2];
1007 Uint64 m_insert_size_lcp_last;
1008 Uint64 m_delete_size_lcp[2];
1009 Uint64 m_delete_size_lcp_last;
1010 Uint64 m_proposed_disk_write_speed;
1011 Uint64 m_lcp_change_rate;
1012 Uint64 m_lcp_timing_factor;
1013 Int64 m_lcp_lag[2];
1014 Uint32 m_lcp_timing_counter;
1015 Uint32 m_redo_percentage;
1016 Uint32 m_max_redo_percentage;
1017 bool m_first_lcp_started;
1018
1019 void init_lcp_timers(Uint64);
1020 void calculate_seconds_since_lcp_cut(Uint64& seconds_since_lcp_cut);
1021 Uint64 init_change_size(Uint64 update_size,
1022 Uint64 insert_size,
1023 Uint64 delete_size,
1024 Uint64 total_memory);
1025 Uint64 modify_change_size(Uint64 update_size,
1026 Uint64 insert_size,
1027 Uint64 delete_size,
1028 Uint64 total_size,
1029 Uint64 change_size);
1030 Uint32 calculate_parts(Uint64 total_size,
1031 Uint64 total_memory);
1032 Uint64 calculate_change_rate(Uint64 change_size,
1033 Uint64& seconds_since_lcp_cut);
1034 Uint64 calculate_checkpoint_rate(Uint64 update_size,
1035 Uint64 insert_size,
1036 Uint64 delete_size,
1037 Uint64 total_memory,
1038 Uint64& seconds_since_lcp_cut);
1039 void calculate_redo_parameters(Uint64 redo_usage,
1040 Uint64 redo_size,
1041 Uint64 redo_written_since_last_call,
1042 Uint64 millis_since_last_call,
1043 Uint64& redo_percentage,
1044 Uint64& max_redo_used_before_cut,
1045 Uint64& mean_redo_used_before_cut,
1046 Uint64& mean_redo_speed_per_sec,
1047 Uint64& current_redo_speed_per_sec,
1048 Uint64& redo_available);
1049 void change_alert_state_redo_percent(Uint64 redo_percentage);
1050 void change_alert_state_redo_usage(Uint64 max_redo_used_before_cut,
1051 Uint64 mean_redo_used_before_cut,
1052 Uint64 redo_available);
1053 void handle_global_alert_state(Signal *signal,
1054 RedoStateRep::RedoAlertState save_redo_alert_state);
1055 void set_redo_alert_factor(Uint64 redo_percentage);
1056 void set_lcp_timing_factors(Uint64 seconds_since_lcp_cut);
1057 void reset_lcp_timing_factors();
1058 void set_proposed_disk_write_speed(Uint64 current_redo_speed_per_sec,
1059 Uint64 mean_redo_speed_per_sec,
1060 Uint64 seconds_since_lcp_cut);
1061 void measure_change_speed(Signal*, Uint64 millis_since_last_call);
1062 void debug_report_redo_control(Uint32);
1063 void lcp_start_point(Signal*);
1064 void lcp_end_point();
1065 Uint64 calculate_proposed_disk_write_speed();
1066
1067 Uint32 m_curr_lcp_id;
1068
1069 /**
1070 * We check the use of disk write speed limits every 100 milliseconds. The
1071 * speed check parameters is also in words, so this means to get the current
1072 * speed in bytes per second we need to multiply with 40.
1073 */
1074 static const int DISK_SPEED_CHECK_DELAY = 100;
1075 static const int CURR_DISK_SPEED_CONVERSION_FACTOR_TO_SECONDS = 40;
1076
1077 Uint64 m_monitor_words_written;
1078 Uint64 m_backup_monitor_words_written;
1079 Uint32 m_periods_passed_in_monitor_period;
1080 NDB_TICKS m_monitor_snapshot_start;
1081
1082 /**
1083 * A number of statistical variables that keep track of
1084 * various events and how often they happen.
1085 */
1086 Uint64 slowdowns_due_to_io_lag;
1087 Uint64 slowdowns_due_to_high_cpu;
1088 Uint64 slowdown_backups_due_to_high_cpu;
1089 Uint64 disk_write_speed_set_to_min;
1090 Uint64 backup_disk_write_speed_set_to_min;
1091
1092 /**
1093 * Variables used to keep stats on disk write speeds for
1094 * reporting in checkpoint_speed ndbinfo table.
1095 * We keep the last 60 seconds of stats and use this to
1096 * calculate various aggregates reported in the ndbinfo
1097 * table.
1098 *
1099 * The idea is that next_disk_write_speed_report specifies
1100 * the next entry to fill in a speed report into. The
1101 * last_disk_write_speed_report points to the oldest one
1102 * that we have written so far. At first we write into
1103 * index 0, so in the beginning is last_disk_write_speed_report
1104 * equal to 0 and next_disk_write_speed_report is pointing to
1105 * the next one to write into. When we write into the last
1106 * entry (index = 60) then we have written in all entries and
1107 * we move the last forward. After that we will always have
1108 * last one ahead of next. Since this means that the next
1109 * to write isn't available (although it isn't written yet)
1110 * we have 61 entries in the array to cover 60 seconds of
1111 * time.
1112 */
1113 #define DISK_WRITE_SPEED_REPORT_SIZE 61
1114
1115 #define MILLIS_IN_A_SECOND 1000
1116 #define MILLIS_ADJUST_FOR_EARLY_REPORT 20
1117 struct DiskWriteSpeedReport
1118 {
1119 Uint64 backup_lcp_bytes_written;
1120 Uint64 backup_bytes_written;
1121 Uint64 redo_bytes_written;
1122 Uint64 target_disk_write_speed;
1123 Uint64 target_backup_disk_write_speed;
1124 Uint64 millis_passed;
1125 };
1126 DiskWriteSpeedReport disk_write_speed_rep[DISK_WRITE_SPEED_REPORT_SIZE];
1127 Uint32 last_disk_write_speed_report;
1128 Uint32 next_disk_write_speed_report;
1129
1130 /**
1131 * Methods used in control of checkpoint speed
1132 */
1133 void handle_overflow(Uint64& overflow_disk_write,
1134 Uint64& words_written_this_period,
1135 Uint64& curr_disk_write_speed);
1136 void calculate_next_delay(const NDB_TICKS curr_time);
1137 void monitor_disk_write_speed(const NDB_TICKS curr_time,
1138 const Uint64 millisPassed);
1139 void calculate_current_speed_bounds(Uint64& max_speed,
1140 Uint64& max_backup_speed,
1141 Uint64& min_speed);
1142 void adjust_disk_write_speed_down(Uint64& curr_disk_write_speed,
1143 Uint64& disk_speed_set_to_min,
1144 Uint64 min_speed,
1145 int adjust_speed);
1146 void adjust_disk_write_speed_up(Uint64& curr_disk_write_speed,
1147 Uint64 max_speed,
1148 int adjust_speed);
1149 void calculate_disk_write_speed(Signal *signal);
1150 void send_next_reset_disk_speed_counter(Signal *signal);
1151
1152 void restore_disk_write_speed_numbers(void);
1153 void calculate_real_disk_write_speed_parameters(void);
1154 Uint64 get_new_speed_val32(Signal *signal);
1155 Uint64 get_new_speed_val64(Signal *signal);
1156
1157 /**
1158 * Methods used in ndbinfo reporting of checkpoint speed.
1159 */
1160 void report_disk_write_speed_report(Uint64 bytes_written_this_period,
1161 Uint64 backup_bytes_written_this_period,
1162 Uint64 millis_passed);
1163 Uint32 get_disk_write_speed_record(Uint32 start_index);
1164 Uint64 calculate_millis_since_finished(Uint32 start_index);
1165 void calculate_disk_write_speed_seconds_back(Uint32 seconds_back,
1166 Uint64 & millis_passed,
1167 Uint64 & backup_lcp_bytes_written,
1168 Uint64 & backup_bytes_written,
1169 Uint64 & redo_bytes_written,
1170 bool at_least_one = false);
1171 void calculate_std_disk_write_speed_seconds_back(Uint32 seconds_back,
1172 Uint64 millis_passed_total,
1173 Uint64 backup_lcp_bytes_written_total,
1174 Uint64 backup_bytes_written_total,
1175 Uint64 redo_bytes_written_total,
1176 Uint64 & std_dev_backup_lcp_in_bytes_per_sec,
1177 Uint64 & std_dev_backup_in_bytes_per_sec,
1178 Uint64 & std_dev_redo_in_bytes_per_sec);
1179
1180
1181 STATIC_CONST(NO_OF_PAGES_META_FILE =
1182 (2*MAX_WORDS_META_FILE + BACKUP_WORDS_PER_PAGE - 1) /
1183 BACKUP_WORDS_PER_PAGE);
1184
1185 Uint32 m_backup_report_frequency;
1186
1187 bool c_initial_start_lcp_not_done_yet;
1188
1189 unsigned char c_part_array[BackupFormat::NDB_MAX_LCP_PARTS * 4];
1190 /**
1191 * Pools
1192 */
1193 Table_pool c_tablePool;
1194 BackupRecord_pool c_backupPool;
1195 BackupFile_pool c_backupFilePool;
1196 Page32_pool c_pagePool;
1197 Fragment_pool c_fragmentPool;
1198 Node_pool c_nodePool;
1199 TriggerRecord_pool c_triggerPool;
1200 ArrayPool<DeleteLcpFile> c_deleteLcpFilePool;
1201
1202 void checkFile(Signal*, BackupFilePtr);
1203 void checkScan(Signal*, BackupRecordPtr, BackupFilePtr, bool);
1204 bool check_new_scan(BackupRecordPtr ptr, OperationRecord &op, bool);
1205 bool check_min_buf_size(BackupRecordPtr ptr, OperationRecord &op);
1206 bool check_frag_complete(BackupRecordPtr ptr, BackupFilePtr filePtr);
1207 bool check_error(BackupRecordPtr ptr, BackupFilePtr filePtr);
1208 void fragmentCompleted(Signal*, BackupFilePtr, Uint32 errCode = 0);
1209
1210 void backupAllData(Signal* signal, BackupRecordPtr);
1211
1212 void getFragmentInfo(Signal*, BackupRecordPtr, TablePtr, Uint32 fragNo);
1213 void getFragmentInfoDone(Signal*, BackupRecordPtr);
1214
1215 void openFiles(Signal* signal, BackupRecordPtr ptr);
1216 void openFilesReply(Signal*, BackupRecordPtr ptr, BackupFilePtr);
1217 void openFilesReplyLCP(Signal*, BackupRecordPtr ptr, BackupFilePtr);
1218 void closeFiles(Signal*, BackupRecordPtr ptr);
1219 void closeFile(Signal*,
1220 BackupRecordPtr,
1221 BackupFilePtr,
1222 bool prepare_phase = false,
1223 bool remove_flag = false);
1224 void closeFilesDone(Signal*, BackupRecordPtr ptr);
1225
1226 void init_file(BackupFilePtr, Uint32);
1227
1228 void sendDefineBackupReq(Signal *signal, BackupRecordPtr ptr);
1229
1230 void defineBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId);
1231 void createTrigReply(Signal* signal, BackupRecordPtr ptr);
1232 void alterTrigReply(Signal* signal, BackupRecordPtr ptr);
1233 void startBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32);
1234 void stopBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId);
1235
1236 void defineBackupRef(Signal*, BackupRecordPtr, Uint32 errCode = 0);
1237 void backupFragmentRef(Signal * signal, BackupFilePtr filePtr);
1238
1239 void nextFragment(Signal*, BackupRecordPtr);
1240 void release_tables(BackupRecordPtr);
1241
1242 void sendCreateTrig(Signal*, BackupRecordPtr ptr, TablePtr tabPtr);
1243 void createAttributeMask(TablePtr tab, Bitmask<MAXNROFATTRIBUTESINWORDS>&);
1244 void sendStartBackup(Signal*, BackupRecordPtr, TablePtr);
1245 void sendAlterTrig(Signal*, BackupRecordPtr ptr);
1246
1247 void sendScanFragReq(Signal*,
1248 BackupRecordPtr,
1249 BackupFilePtr,
1250 TablePtr,
1251 FragmentPtr,
1252 Uint32 delay);
1253
1254 void init_scan_prio_level(Signal *signal, BackupRecordPtr ptr);
1255 bool check_scan_if_raise_prio(Signal *signal, BackupRecordPtr ptr);
1256
1257 void sendDropTrig(Signal*, BackupRecordPtr ptr);
1258 void sendDropTrig(Signal* signal, BackupRecordPtr ptr, TablePtr tabPtr);
1259 void dropTrigReply(Signal*, BackupRecordPtr ptr);
1260
1261 void sendSignalAllWait(BackupRecordPtr ptr, Uint32 gsn, Signal *signal,
1262 Uint32 signalLength,
1263 bool executeDirect = false);
1264 bool haveAllSignals(BackupRecordPtr ptr, Uint32 gsn, Uint32 nodeId);
1265
1266 void sendStopBackup(Signal*, BackupRecordPtr ptr);
1267 void sendAbortBackupOrd(Signal* signal, BackupRecordPtr ptr, Uint32 errCode);
1268 void sendAbortBackupOrdSlave(Signal* signal, BackupRecordPtr ptr,
1269 Uint32 errCode);
1270 void masterAbort(Signal*, BackupRecordPtr ptr);
1271 void masterSendAbortBackup(Signal*, BackupRecordPtr ptr);
1272 void slaveAbort(Signal*, BackupRecordPtr ptr);
1273
1274 void abortFile(Signal* signal, BackupRecordPtr ptr, BackupFilePtr filePtr);
1275 void abortFileHook(Signal* signal, BackupFilePtr filePtr, bool scanDone);
1276
1277 bool verifyNodesAlive(BackupRecordPtr, const NdbNodeBitmask& aNodeBitMask);
1278 bool checkAbort(BackupRecordPtr ptr);
1279 void checkNodeFail(Signal* signal,
1280 BackupRecordPtr ptr,
1281 NodeId newCoord,
1282 Uint32 theFailedNodes[NdbNodeBitmask::Size]);
1283 void masterTakeOver(Signal* signal, BackupRecordPtr ptr);
1284
1285
getMasterNodeId() const1286 NodeId getMasterNodeId() const { return c_masterNodeId; }
1287 bool findTable(const BackupRecordPtr &, TablePtr &, Uint32 tableId);
1288 void insertTableMap(TablePtr &, Uint32 backupPtrI, Uint32 tableId);
1289 void removeTableMap(TablePtr &, Uint32 backupPtrI, Uint32 tableId);
1290 bool parseTableDescription(Signal*, BackupRecordPtr ptr, TablePtr, const Uint32*, Uint32);
1291
1292 bool insertFileHeader(BackupFormat::FileType, BackupRecord*, BackupFile*);
1293 void sendBackupRef(Signal* signal, BackupRecordPtr ptr, Uint32 errorCode);
1294 void sendBackupRef(BlockReference ref, Uint32 flags, Signal *signal,
1295 Uint32 senderData, Uint32 errorCode);
1296 void dumpUsedResources();
1297 void cleanup(Signal*, BackupRecordPtr ptr);
1298 void abort_scan(Signal*, BackupRecordPtr ptr);
1299 void removeBackup(Signal*, BackupRecordPtr ptr);
1300
1301 void sendUtilSequenceReq(Signal*, BackupRecordPtr ptr, Uint32 delay = 0);
1302
1303 /*
1304 For periodic backup status reporting and explicit backup status reporting
1305 */
1306 /* Init at start of backup, timers etc... */
1307 void initReportStatus(Signal* signal, BackupRecordPtr ptr);
1308 /* Sheck timers for reporting at certain points */
1309 void checkReportStatus(Signal* signal, BackupRecordPtr ptr);
1310 /* Send backup status, invoked either periodically, or explicitly */
1311 void reportStatus(Signal* signal, BackupRecordPtr ptr,
1312 BlockReference ref = CMVMI_REF);
1313
1314 void sendSTTORRY(Signal*);
1315 void createSequence(Signal* signal);
1316 void createSequenceReply(Signal*, class UtilSequenceConf *);
1317
1318 void lcp_close_ctl_file_drop_case(Signal*, BackupRecordPtr);
1319 void finish_end_lcp(Signal*, BackupRecordPtr);
1320 bool check_if_in_page_range(Uint32 part_id,
1321 Uint32 start_part,
1322 Uint32 num_parts);
1323 void lcp_read_ctl_file(Page32Ptr, Uint32, BackupRecordPtr);
1324 void lcp_write_ctl_file(Signal*, BackupRecordPtr);
1325 void lcp_write_ctl_file_to_disk(Signal*, BackupFilePtr, Page32Ptr);
1326 void lcp_init_ctl_file(Page32Ptr pagePtr);
1327 Uint32 compress_part_pairs(struct BackupFormat::LCPCtlFile*,
1328 Uint32 numPartPairs,
1329 Uint32 file_size);
1330 Uint32 decompress_part_pairs(struct BackupFormat::LCPCtlFile*,
1331 Uint32,
1332 struct BackupFormat::PartPair*);
1333 bool convert_ctl_page_to_host(struct BackupFormat::LCPCtlFile*);
1334 void convert_ctl_page_to_network(Uint32*, Uint32 file_size);
1335 void handle_idle_lcp(Signal*, BackupRecordPtr);
1336 Uint64 get_total_memory();
1337 Uint64 calculate_row_change_count(BackupRecordPtr);
1338 Uint32 calculate_min_parts(Uint64 row_count,
1339 Uint64 row_change_count,
1340 Uint64 mem_used,
1341 Uint64 total_mem);
1342 void calculate_number_of_parts(BackupRecordPtr);
1343 void copy_lcp_info_from_prepare(BackupRecordPtr);
1344 void copy_prev_lcp_info(BackupRecordPtr,
1345 struct BackupFormat::LCPCtlFile *lcpCtlFilePtr);
1346 void start_lcp_scan(Signal *signal,
1347 BackupRecordPtr ptr,
1348 TablePtr tabPtr,
1349 Uint32 ptrI,
1350 Uint32 fragNo);
1351 Uint32 get_part_add(Uint32 start_part, Uint32 num_parts);
1352 Uint32 get_file_add(Uint32 start_file, Uint32 num_files);
1353 Uint32 get_file_sub(Uint32 start_file, Uint32 num_files);
1354
1355
1356 void prepare_ranges_for_parts(BackupRecordPtr, Uint32 parts);
1357 void prepare_parts_for_lcp(Signal*, BackupRecordPtr);
1358 void prepare_new_part_info(BackupRecordPtr, Uint32);
1359 void lcp_swap_tables(BackupRecordPtr, TablePtr&, Uint32);
1360 void lcp_swap_data_file(BackupRecordPtr);
1361 void lcp_swap_ctl_file(BackupRecordPtr);
1362 void lcp_set_lcp_id(BackupRecordPtr,
1363 struct BackupFormat::LCPCtlFile*);
1364 void lcp_copy_ctl_page(BackupRecordPtr);
1365 void lcp_update_ctl_page(BackupRecordPtr,
1366 Page32Ptr&,
1367 BackupFilePtr&);
1368 Uint32 lcp_pre_sync_lsn(BackupRecordPtr);
1369 void start_execute_lcp(Signal* signal,
1370 BackupRecordPtr ptr,
1371 TablePtr & tabPtr,
1372 Uint32 tableId);
1373 void lcp_open_ctl_file(Signal*, BackupRecordPtr, Uint32 lcpNo);
1374 void lcp_open_ctl_file_done(Signal*, BackupRecordPtr, BackupFilePtr);
1375 void lcp_read_ctl_file_done(Signal* signal, BackupRecordPtr);
1376 void lcp_close_ctl_file(Signal*, BackupRecordPtr, Uint32 closeLcpNo);
1377 void lcp_close_prepare_ctl_file_done(Signal*, BackupRecordPtr);
1378 void lcp_read_ctl_page(BackupFilePtr, Page32Ptr&);
1379 void lcp_open_data_file(Signal*, BackupRecordPtr);
1380 void lcp_open_data_file_late(Signal*, BackupRecordPtr, Uint32 index);
1381 void lcp_open_data_file_done(Signal*, BackupRecordPtr);
1382 void lcp_close_data_file(Signal*, BackupRecordPtr, bool remove_flag);
1383 void lcp_close_data_file_conf(Signal* signal, BackupRecordPtr);
1384 void read_lcp_descriptor(Signal*, BackupRecordPtr, TablePtr);
1385 void lcp_start_complete_processing(Signal *signal, BackupRecordPtr ptr);
1386 void sync_page_cache_callback(Signal*, Uint32 ptrI, Uint32 res);
1387 void finalize_lcp_processing(Signal*, BackupRecordPtr);
1388 void lcp_one_part_completed(Signal*, BackupRecordPtr);
1389 void lcp_write_undo_log(Signal *signal, BackupRecordPtr);
1390
1391 void check_wait_end_lcp(Signal*, BackupRecordPtr ptr);
1392 void check_empty_queue_waiters(Signal*, BackupRecordPtr ptr);
1393 void delete_lcp_file_processing(Signal*);
1394 void finished_removing_files(Signal*, BackupRecordPtr);
1395 void sendEND_LCPCONF(Signal*, BackupRecordPtr);
1396 void send_firstSYNC_EXTENT_PAGES_REQ(Signal*, BackupRecordPtr);
1397 void sendINFORM_BACKUP_DROP_TAB_CONF(Signal*, BackupRecordPtr);
1398
1399 void sync_log_lcp_lsn(Signal*, DeleteLcpFilePtr, Uint32 ptrI);
1400 void sync_log_lcp_lsn_callback(Signal*, Uint32 ptrI, Uint32 res);
1401 void lcp_open_ctl_file_for_rewrite(Signal*,
1402 DeleteLcpFilePtr,
1403 BackupRecordPtr);
1404 void lcp_open_ctl_file_for_rewrite_done(Signal*, BackupFilePtr);
1405 void lcp_read_ctl_file_for_rewrite(Signal*, BackupFilePtr);
1406 void lcp_read_ctl_file_for_rewrite_done(Signal*, BackupFilePtr);
1407 void lcp_update_ctl_file_for_rewrite(Signal*, BackupFilePtr, Page32Ptr);
1408 void lcp_update_ctl_file_for_rewrite_done(Signal*,
1409 BackupRecordPtr,
1410 BackupFilePtr);
1411 void lcp_close_ctl_file_for_rewrite(Signal*, BackupRecordPtr, BackupFilePtr);
1412 void lcp_close_ctl_file_for_rewrite_done(Signal*,
1413 BackupRecordPtr,
1414 BackupFilePtr);
1415
1416 void lcp_remove_file(Signal*,
1417 BackupRecordPtr,
1418 DeleteLcpFilePtr);
1419 void lcp_remove_file_conf(Signal*, BackupRecordPtr ptr);
1420
1421 bool ready_to_write(bool ready,
1422 Uint32 sz,
1423 bool eof,
1424 BackupFile *fileP,
1425 BackupRecord* ptrP);
1426
1427 void afterGetTabinfoLockTab(Signal *signal,
1428 BackupRecordPtr ptr, TablePtr tabPtr);
1429 void cleanupNextTable(Signal *signal, BackupRecordPtr ptr, TablePtr tabPtr);
1430
1431 BackupFormat::LogFile::LogEntry* get_log_buffer(Signal*,TriggerPtr, Uint32);
1432
1433 /*
1434 * MT LQH. LCP runs separately in each instance number.
1435 * BACKUP uses instance key 1 (real instance 0 or 1) as master.
1436 */
1437 STATIC_CONST( NdbdInstanceKey = 0 );
1438 STATIC_CONST( BackupProxyInstanceKey = 0 );
1439 STATIC_CONST( UserBackupInstanceKey = 1 );
1440 /*
1441 * instanceKey() is used for routing backup control signals and has 3
1442 * use cases:
1443 * - LCP: return own instance ID, i.e route signal to self
1444 * - multi-threaded backup: return instance of BackupProxy, which
1445 forwards signal to all instances, i.e. route signal to all instances
1446 * - single-threaded backup: return instance 1, i.e. route signal to LDM1
1447 */
instanceKey(BackupRecordPtr ptr)1448 Uint32 instanceKey(BackupRecordPtr ptr) {
1449 return ptr.p->is_lcp() ?
1450 instance() : (ptr.p->flags & BackupReq::MT_BACKUP) ?
1451 BackupProxyInstanceKey : UserBackupInstanceKey;
1452 }
1453
1454 /* map a fragment to an LDM
1455 * single-threaded backup: assign fragment to LDM1
1456 * multithreaded backup: assign fragment to LDM which owns it
1457 */
mapFragToLdm(BackupRecordPtr ptr,Uint32 ownerNode,Uint32 ownerLdm)1458 Uint32 mapFragToLdm(BackupRecordPtr ptr, Uint32 ownerNode, Uint32 ownerLdm)
1459 {
1460 // instance key is 1..n and may be larger than actual number of ldms.
1461 // To ensure we only schedule one fragment per actual ldm at a time, we
1462 // use node information to determine actual ldm which will process request.
1463 int lqh_workers = getNodeInfo(ownerNode).m_lqh_workers;
1464 // adjust values which would be 0 in ndbd
1465 lqh_workers += (lqh_workers == 0);
1466 ownerLdm += (ownerLdm == 0);
1467 // calculate instance key
1468 Uint32 key = 1 + ((ownerLdm - 1) % lqh_workers);
1469 return (ptr.p->flags & BackupReq::MT_BACKUP) ?
1470 key : UserBackupInstanceKey;
1471 }
1472
is_backup_worker()1473 bool is_backup_worker()
1474 {
1475 return isNdbMtLqh() ? (instance() == UserBackupInstanceKey) : true;
1476 }
1477 /*
1478 * Select master instance on any node: LDM1 for ndbmtd, LDM0 for ndbd
1479 * Used in node-failure aborts when a participant node is promoted to master
1480 */
masterInstanceKey(BackupRecordPtr ptr)1481 Uint32 masterInstanceKey(BackupRecordPtr ptr) {
1482 return isNdbMtLqh() ?
1483 UserBackupInstanceKey : NdbdInstanceKey;
1484 }
1485
1486
1487 /**
1488 * Ugly shared state to allow different worker instances
1489 * to detect that a backup is going, although they are
1490 * not participating.
1491 * Modified by the instance performing backup
1492 */
1493 static bool g_is_single_thr_backup_running;
1494
1495 void get_page_info(BackupRecordPtr,
1496 Uint32 part_id,
1497 Uint32 & scanGCI,
1498 bool & changed_row_page_flag);
1499 void set_working_file(BackupRecordPtr,
1500 Uint32 part_id,
1501 bool is_all_rows_page);
1502 void init_file_for_lcp(Signal*, Uint32 index, BackupRecordPtr, Uint32 ptrI);
1503 bool is_all_rows_page(BackupRecordPtr, Uint32 part_id);
1504 void change_current_page_temp(Uint32 page_no);
1505 void restore_current_page(BackupRecordPtr ptr);
1506
1507 void setRestorableGci(Uint32);
1508 Uint32 getRestorableGci();
1509
1510 void set_undo_log_level(Uint32 percentage_used);
1511
1512 bool check_pause_lcp_backup(BackupRecordPtr ptr,
1513 bool is_lcp,
1514 bool is_send_scan_next_req);
1515 bool check_pause_lcp_backup(BackupRecordPtr ptr);
1516 bool check_pause_lcp();
1517 void update_pause_lcp_counter(Uint32 loop_count);
1518 void pausing_lcp(Uint32 place, Uint32 val);
1519 void get_lcp_record(BackupRecordPtr &ptr);
1520 bool get_backup_record(BackupRecordPtr &ptr);
1521 public:
1522 bool is_change_part_state(Uint32 page_id);
1523 Uint32 get_max_words_per_scan_batch(Uint32, Uint32&, Uint32, Uint32);
1524 };
1525
1526 inline
1527 Uint32
getRestorableGci()1528 Backup::getRestorableGci()
1529 {
1530 return m_newestRestorableGci;
1531 }
1532
1533 inline
1534 void
set_scanned_pages(Uint32 num_pages_scanned)1535 Backup::OperationRecord::set_scanned_pages(Uint32 num_pages_scanned)
1536 {
1537 lcpScannedPages = num_pages_scanned;
1538 }
1539
1540 inline
1541 void
newRecord(Uint32 * p)1542 Backup::OperationRecord::newRecord(Uint32 * p)
1543 {
1544 dst = p;
1545 scanStop = p;
1546 }
1547
1548 inline
1549 void
finished(Uint32 len)1550 Backup::OperationRecord::finished(Uint32 len)
1551 {
1552 opLen += len;
1553 opNoDone++;
1554 noOfRecords++;
1555 }
1556
1557
1558 #define ZMAX_WORDS_PER_SCAN_BATCH_LOW_PRIO 1600
1559 #define ZMAX_WORDS_PER_SCAN_BATCH_HIGH_PRIO 8000
1560 inline
1561 bool
check_pause_lcp()1562 Backup::check_pause_lcp()
1563 {
1564 return check_pause_lcp_backup(m_lcp_ptr, true, false);
1565 }
1566
1567 inline
1568 bool
check_pause_lcp_backup(BackupRecordPtr ptr)1569 Backup::check_pause_lcp_backup(BackupRecordPtr ptr)
1570 {
1571 return check_pause_lcp_backup(ptr, ptr.p->is_lcp(), true);
1572 }
1573
1574 inline
1575 Uint32
get_max_words_per_scan_batch(Uint32 prioAFlag,Uint32 & wordsWritten,Uint32 is_lcp,Uint32 ptrI)1576 Backup::get_max_words_per_scan_batch(Uint32 prioAFlag,
1577 Uint32 & wordsWritten,
1578 Uint32 is_lcp,
1579 Uint32 ptrI)
1580 {
1581 if (prioAFlag == 0)
1582 return (wordsWritten >= ZMAX_WORDS_PER_SCAN_BATCH_LOW_PRIO);
1583 else
1584 {
1585 bool ret_val;
1586 if (is_lcp)
1587 ret_val = check_pause_lcp();
1588 else
1589 ret_val = (wordsWritten >= ZMAX_WORDS_PER_SCAN_BATCH_HIGH_PRIO);
1590 if (ret_val)
1591 wordsWritten = 0;
1592 return ret_val;
1593 }
1594 }
1595
1596 #undef JAM_FILE_ID
1597
1598 #endif
1599