1 /*
2 Copyright (c) 2003, 2021, Oracle and/or its affiliates.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #include <my_global.h>
26 #include "Backup.hpp"
27
28 #include <ndb_version.h>
29
30 #include <NdbTCP.h>
31 #include <Bitmask.hpp>
32
33 #include <signaldata/NodeFailRep.hpp>
34 #include <signaldata/ReadNodesConf.hpp>
35
36 #include <signaldata/DihScanTab.hpp>
37 #include <signaldata/ScanFrag.hpp>
38
39 #include <signaldata/GetTabInfo.hpp>
40 #include <signaldata/DictTabInfo.hpp>
41 #include <signaldata/ListTables.hpp>
42
43 #include <signaldata/FsOpenReq.hpp>
44 #include <signaldata/FsAppendReq.hpp>
45 #include <signaldata/FsCloseReq.hpp>
46 #include <signaldata/FsConf.hpp>
47 #include <signaldata/FsRef.hpp>
48 #include <signaldata/FsRemoveReq.hpp>
49
50 #include <signaldata/BackupImpl.hpp>
51 #include <signaldata/BackupSignalData.hpp>
52 #include <signaldata/BackupContinueB.hpp>
53 #include <signaldata/EventReport.hpp>
54
55 #include <signaldata/UtilSequence.hpp>
56
57 #include <signaldata/CreateTrigImpl.hpp>
58 #include <signaldata/DropTrigImpl.hpp>
59 #include <signaldata/FireTrigOrd.hpp>
60 #include <signaldata/TrigAttrInfo.hpp>
61 #include <AttributeHeader.hpp>
62
63 #include <signaldata/WaitGCP.hpp>
64 #include <signaldata/LCP.hpp>
65 #include <signaldata/BackupLockTab.hpp>
66 #include <signaldata/DumpStateOrd.hpp>
67
68 #include <signaldata/DumpStateOrd.hpp>
69
70 #include <signaldata/DbinfoScan.hpp>
71 #include <signaldata/TransIdAI.hpp>
72
73 #include <NdbTick.h>
74 #include <dbtup/Dbtup.hpp>
75
76 #include <EventLogger.hpp>
77 extern EventLogger * g_eventLogger;
78
79 #include <math.h>
80
81 #define JAM_FILE_ID 475
82
83 static const Uint32 WaitDiskBufferCapacityMillis = 1;
84 static const Uint32 WaitScanTempErrorRetryMillis = 10;
85
86 static NDB_TICKS startTime;
87
88 #ifdef VM_TRACE
89 #define DEBUG_OUT(x) ndbout << x << endl
90 #else
91 #define DEBUG_OUT(x)
92 #endif
93
94 //#define DEBUG_ABORT
95 //#define dbg globalSignalLoggers.log
96
97 static Uint32 g_TypeOfStart = NodeState::ST_ILLEGAL_TYPE;
98
99 #define SEND_BACKUP_STARTED_FLAG(A) (((A) & 0x3) > 0)
100 #define SEND_BACKUP_COMPLETED_FLAG(A) (((A) & 0x3) > 1)
101
102 /**
103 * "Magic" constants used for adaptive LCP speed algorithm. These magic
104 * constants tries to ensure a smooth LCP load which is high enough to
105 * avoid slowing down LCPs such that we run out of REDO logs. Also low
106 * enough to avoid that we use so much CPU on LCPs that we block out
107 * most user transactions. We also want to avoid destroying real-time
108 * characteristics due to LCPs.
109 *
110 * See much longer explanation of these values below.
111 */
112 #define MAX_LCP_WORDS_PER_BATCH (1500)
113
114 #define HIGH_LOAD_LEVEL 32
115 #define VERY_HIGH_LOAD_LEVEL 48
116 #define NUMBER_OF_SIGNALS_PER_SCAN_BATCH 3
117 #define MAX_RAISE_PRIO_MEMORY 16
118
119 void
execSTTOR(Signal * signal)120 Backup::execSTTOR(Signal* signal)
121 {
122 jamEntry();
123
124 const Uint32 startphase = signal->theData[1];
125 const Uint32 typeOfStart = signal->theData[7];
126
127 if (startphase == 1)
128 {
129 ndbrequire((c_lqh = (Dblqh*)globalData.getBlock(DBLQH, instance())) != 0);
130 m_words_written_this_period = 0;
131 last_disk_write_speed_report = 0;
132 next_disk_write_speed_report = 0;
133 m_monitor_words_written = 0;
134 m_periods_passed_in_monitor_period = 0;
135 m_monitor_snapshot_start = NdbTick_getCurrentTicks();
136 m_curr_disk_write_speed = c_defaults.m_disk_write_speed_max_own_restart;
137 m_overflow_disk_write = 0;
138 slowdowns_due_to_io_lag = 0;
139 slowdowns_due_to_high_cpu = 0;
140 disk_write_speed_set_to_min = 0;
141 m_is_any_node_restarting = false;
142 m_node_restart_check_sent = false;
143 m_our_node_started = false;
144 m_reset_disk_speed_time = NdbTick_getCurrentTicks();
145 m_reset_delay_used = Backup::DISK_SPEED_CHECK_DELAY;
146 signal->theData[0] = BackupContinueB::RESET_DISK_SPEED_COUNTER;
147 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal,
148 Backup::DISK_SPEED_CHECK_DELAY, 1);
149 }
150 if (startphase == 3) {
151 jam();
152 g_TypeOfStart = typeOfStart;
153 signal->theData[0] = reference();
154 sendSignal(NDBCNTR_REF, GSN_READ_NODESREQ, signal, 1, JBB);
155 return;
156 }//if
157
158 if (startphase == 7)
159 {
160 m_monitor_words_written = 0;
161 m_periods_passed_in_monitor_period = 0;
162 m_monitor_snapshot_start = NdbTick_getCurrentTicks();
163 m_curr_disk_write_speed = c_defaults.m_disk_write_speed_min;
164 m_our_node_started = true;
165 }
166
167 if(startphase == 7 && g_TypeOfStart == NodeState::ST_INITIAL_START &&
168 c_masterNodeId == getOwnNodeId() && !isNdbMtLqh()){
169 jam();
170 createSequence(signal);
171 return;
172 }//if
173
174 sendSTTORRY(signal);
175 return;
176 }//Dbdict::execSTTOR()
177
178 void
execREAD_NODESCONF(Signal * signal)179 Backup::execREAD_NODESCONF(Signal* signal)
180 {
181 jamEntry();
182 ReadNodesConf * conf = (ReadNodesConf *)signal->getDataPtr();
183
184 c_aliveNodes.clear();
185
186 Uint32 count = 0;
187 for (Uint32 i = 0; i<MAX_NDB_NODES; i++) {
188 jam();
189 if(NdbNodeBitmask::get(conf->allNodes, i)){
190 jam();
191 count++;
192
193 NodePtr node;
194 ndbrequire(c_nodes.seizeFirst(node));
195
196 node.p->nodeId = i;
197 if(NdbNodeBitmask::get(conf->inactiveNodes, i)) {
198 jam();
199 node.p->alive = 0;
200 } else {
201 jam();
202 node.p->alive = 1;
203 c_aliveNodes.set(i);
204 }//if
205 }//if
206 }//for
207 c_masterNodeId = conf->masterNodeId;
208 ndbrequire(count == conf->noOfNodes);
209 sendSTTORRY(signal);
210 }
211
212 void
sendSTTORRY(Signal * signal)213 Backup::sendSTTORRY(Signal* signal)
214 {
215 signal->theData[0] = 0;
216 signal->theData[3] = 1;
217 signal->theData[4] = 3;
218 signal->theData[5] = 7;
219 signal->theData[6] = 255; // No more start phases from missra
220 BlockReference cntrRef = !isNdbMtLqh() ? NDBCNTR_REF : BACKUP_REF;
221 sendSignal(cntrRef, GSN_STTORRY, signal, 7, JBB);
222 }
223
224 void
createSequence(Signal * signal)225 Backup::createSequence(Signal* signal)
226 {
227 UtilSequenceReq * req = (UtilSequenceReq*)signal->getDataPtrSend();
228
229 req->senderData = RNIL;
230 req->sequenceId = NDB_BACKUP_SEQUENCE;
231 req->requestType = UtilSequenceReq::Create;
232
233 sendSignal(DBUTIL_REF, GSN_UTIL_SEQUENCE_REQ,
234 signal, UtilSequenceReq::SignalLength, JBB);
235 }
236
237 void
handle_overflow(void)238 Backup::handle_overflow(void)
239 {
240 jam();
241 /**
242 * If we overflowed in the last period, count it in
243 * this new period, potentially overflowing again into
244 * future periods...
245 *
246 * The overflow can only come from the last write we did in this
247 * period, but potentially this write is bigger than what we are
248 * allowed to write during one period.
249 *
250 * Calculate the overflow to pass into the new period
251 * (overflowThisPeriod). It can never be more than what is
252 * allowed to be written during a period.
253 *
254 * We could rarely end up in the case that the overflow of the
255 * last write in the period even overflows the entire next period.
256 * If so we put this into the remainingOverFlow and put this into
257 * m_overflow_disk_write (in this case nothing will be written in
258 * this period so ready_to_write need not worry about this case
259 * when setting m_overflow_disk_write since it isn't written any time
260 * in this case and in all other cases only written by the last write
261 * in a period.
262 */
263 Uint32 overflowThisPeriod = MIN(m_overflow_disk_write,
264 m_curr_disk_write_speed + 1);
265
266 /* How much overflow remains after this period? */
267 Uint32 remainingOverFlow = m_overflow_disk_write - overflowThisPeriod;
268
269 if (overflowThisPeriod)
270 {
271 jam();
272 #ifdef DEBUG_CHECKPOINTSPEED
273 ndbout_c("Overflow of %u bytes (max/period is %u bytes)",
274 overflowThisPeriod * 4, m_curr_disk_write_speed * 4);
275 #endif
276 if (remainingOverFlow)
277 {
278 jam();
279 #ifdef DEBUG_CHECKPOINTSPEED
280 ndbout_c(" Extra overflow : %u bytes, will take %u further periods"
281 " to clear", remainingOverFlow * 4,
282 remainingOverFlow / m_curr_disk_write_speed);
283 #endif
284 }
285 }
286 m_words_written_this_period = overflowThisPeriod;
287 m_overflow_disk_write = remainingOverFlow;
288 }
289
290 void
calculate_next_delay(const NDB_TICKS curr_time)291 Backup::calculate_next_delay(const NDB_TICKS curr_time)
292 {
293 /**
294 * Adjust for upto 10 millisecond delay of this signal. Longer
295 * delays will not be handled, in this case the system is most
296 * likely under too high load and it won't matter very much that
297 * we decrease the speed of checkpoints.
298 *
299 * We use a technique where we allow an overflow write in one
300 * period. This overflow will be removed from the next period
301 * such that the load will at average be as specified.
302 * Calculate new delay time based on if we overslept or underslept
303 * this time. We will never regulate more than 10ms, if the
304 * oversleep is bigger than we will simply ignore it. We will
305 * decrease the delay by as much as we overslept or increase it by
306 * as much as we underslept.
307 */
308 int delay_time = m_reset_delay_used;
309 int sig_delay = int(NdbTick_Elapsed(m_reset_disk_speed_time,
310 curr_time).milliSec());
311 if (sig_delay > delay_time + 10)
312 {
313 delay_time = Backup::DISK_SPEED_CHECK_DELAY - 10;
314 }
315 else if (sig_delay < delay_time - 10)
316 {
317 delay_time = Backup::DISK_SPEED_CHECK_DELAY + 10;
318 }
319 else
320 {
321 delay_time = Backup::DISK_SPEED_CHECK_DELAY -
322 (sig_delay - delay_time);
323 }
324 m_periods_passed_in_monitor_period++;
325 m_reset_delay_used= delay_time;
326 m_reset_disk_speed_time = curr_time;
327 #if 0
328 ndbout << "Signal delay was = " << sig_delay;
329 ndbout << " Current time = " << curr_time << endl;
330 ndbout << " Delay time will be = " << delay_time << endl << endl;
331 #endif
332 }
333
334 void
report_disk_write_speed_report(Uint64 bytes_written_this_period,Uint64 millis_passed)335 Backup::report_disk_write_speed_report(Uint64 bytes_written_this_period,
336 Uint64 millis_passed)
337 {
338 Uint32 report = next_disk_write_speed_report;
339 disk_write_speed_rep[report].backup_lcp_bytes_written =
340 bytes_written_this_period;
341 disk_write_speed_rep[report].millis_passed =
342 millis_passed;
343 disk_write_speed_rep[report].redo_bytes_written =
344 c_lqh->report_redo_written_bytes();
345 disk_write_speed_rep[report].target_disk_write_speed =
346 m_curr_disk_write_speed * CURR_DISK_SPEED_CONVERSION_FACTOR_TO_SECONDS;
347
348 next_disk_write_speed_report++;
349 if (next_disk_write_speed_report == DISK_WRITE_SPEED_REPORT_SIZE)
350 {
351 next_disk_write_speed_report = 0;
352 }
353 if (next_disk_write_speed_report == last_disk_write_speed_report)
354 {
355 last_disk_write_speed_report++;
356 if (last_disk_write_speed_report == DISK_WRITE_SPEED_REPORT_SIZE)
357 {
358 last_disk_write_speed_report = 0;
359 }
360 }
361 }
362
363 /**
364 * This method is a check that we haven't been writing faster than we're
365 * supposed to during the last interval.
366 */
367 void
monitor_disk_write_speed(const NDB_TICKS curr_time,const Uint64 millisPassed)368 Backup::monitor_disk_write_speed(const NDB_TICKS curr_time,
369 const Uint64 millisPassed)
370 {
371 /**
372 * Independent check of DiskCheckpointSpeed.
373 * We check every second or so that we are roughly sticking
374 * to our diet.
375 */
376 jam();
377 const Uint64 periodsPassed =
378 (millisPassed / DISK_SPEED_CHECK_DELAY) + 1;
379 const Uint64 quotaWordsPerPeriod = m_curr_disk_write_speed;
380 const Uint64 maxOverFlowWords = c_defaults.m_maxWriteSize / 4;
381 const Uint64 maxExpectedWords = (periodsPassed * quotaWordsPerPeriod) +
382 maxOverFlowWords;
383
384 if (unlikely(m_monitor_words_written > maxExpectedWords))
385 {
386 jam();
387 /**
388 * In the last monitoring interval, we have written more words
389 * than allowed by the quota (DiskCheckpointSpeed), including
390 * transient spikes due to a single MaxBackupWriteSize write
391 */
392 ndbout << "Backup : Excessive Backup/LCP write rate in last"
393 << " monitoring period - recorded = "
394 << (m_monitor_words_written * 4 * 1000) / millisPassed
395 << " bytes/s, "
396 << endl
397 << "Current speed is = "
398 << m_curr_disk_write_speed *
399 CURR_DISK_SPEED_CONVERSION_FACTOR_TO_SECONDS
400 << " bytes/s"
401 << endl;
402 ndbout << "Backup : Monitoring period : " << millisPassed
403 << " millis. Bytes written : " << (m_monitor_words_written * 4)
404 << ". Max allowed : " << (maxExpectedWords * 4) << endl;
405 ndbout << "Actual number of periods in this monitoring interval: ";
406 ndbout << m_periods_passed_in_monitor_period;
407 ndbout << " calculated number was: " << periodsPassed << endl;
408 }
409 report_disk_write_speed_report(4 * m_monitor_words_written, millisPassed);
410 m_monitor_words_written = 0;
411 m_periods_passed_in_monitor_period = 0;
412 m_monitor_snapshot_start = curr_time;
413 }
414
415 void
adjust_disk_write_speed_down(int adjust_speed)416 Backup::adjust_disk_write_speed_down(int adjust_speed)
417 {
418 m_curr_disk_write_speed -= adjust_speed;
419 if (m_curr_disk_write_speed < c_defaults.m_disk_write_speed_min)
420 {
421 disk_write_speed_set_to_min++;
422 m_curr_disk_write_speed = c_defaults.m_disk_write_speed_min;
423 }
424 }
425
426 void
adjust_disk_write_speed_up(int adjust_speed)427 Backup::adjust_disk_write_speed_up(int adjust_speed)
428 {
429 Uint64 max_disk_write_speed = m_is_any_node_restarting ?
430 c_defaults.m_disk_write_speed_max_other_node_restart :
431 c_defaults.m_disk_write_speed_max;
432
433 m_curr_disk_write_speed += adjust_speed;
434 if (m_curr_disk_write_speed > max_disk_write_speed)
435 {
436 m_curr_disk_write_speed = max_disk_write_speed;
437 }
438 }
439
440 /**
441 * Calculate new disk checkpoint write speed based on the new
442 * multiplication factor, we decrease in steps of 10% per second
443 */
444 void
calculate_disk_write_speed(Signal * signal)445 Backup::calculate_disk_write_speed(Signal *signal)
446 {
447 Uint64 max_disk_write_speed = m_is_any_node_restarting ?
448 c_defaults.m_disk_write_speed_max_other_node_restart :
449 c_defaults.m_disk_write_speed_max;
450 /**
451 * Calculate the max - min and divide by 12 to get the adjustment parameter
452 * which is 8% of max - min. We will never adjust faster than this to avoid
453 * to quick adaptiveness. For adjustments down we will adapt faster for IO
454 * lags, for CPU speed we will adapt a bit slower dependent on how high
455 * the CPU load is.
456 */
457 int diff_disk_write_speed =
458 max_disk_write_speed -
459 c_defaults.m_disk_write_speed_min;
460
461 int adjust_speed_up = diff_disk_write_speed / 12;
462 int adjust_speed_down_high = diff_disk_write_speed / 7;
463 int adjust_speed_down_medium = diff_disk_write_speed / 10;
464 int adjust_speed_down_low = diff_disk_write_speed / 14;
465
466 jam();
467 if (diff_disk_write_speed <= 0 ||
468 adjust_speed_up == 0)
469 {
470 jam();
471 /**
472 * The min == max which gives no room to adapt the LCP speed.
473 * or the difference is too small to adapt it.
474 */
475 return;
476 }
477 if (!m_our_node_started)
478 {
479 /* No adaptiveness while we're still starting. */
480 jam();
481 return;
482 }
483 if (c_lqh->is_ldm_instance_io_lagging())
484 {
485 /**
486 * With IO lagging behind we will decrease the LCP speed to accomodate
487 * for more REDO logging bandwidth. The definition of REDO log IO lagging
488 * is kept in DBLQH, but will be a number of seconds of outstanding REDO
489 * IO requests that LQH is still waiting for completion of.
490 * This is a harder condition, so here we will immediately slow down fast.
491 */
492 jam();
493 slowdowns_due_to_io_lag++;
494 adjust_disk_write_speed_down(adjust_speed_down_high);
495 }
496 else
497 {
498 /**
499 * Get CPU usage of this LDM thread during last second.
500 * If CPU usage is over or equal to 95% we will decrease the LCP speed
501 * If CPU usage is below 90% we will increase the LCP speed
502 * one more step. Otherwise we will keep it where it currently is.
503 *
504 * The speed of writing backups and LCPs are fairly linear to the
505 * amount of bytes written. So e.g. writing 10 MByte/second gives
506 * roughly about 10% CPU usage in one CPU. So by writing less we have a
507 * more or less linear decrease of CPU usage. Naturally the speed of
508 * writing is very much coupled to the CPU speed. CPUs today have all
509 * sorts of power save magic, but this algorithm doesn't kick in until
510 * we're at very high CPU loads where we won't be in power save mode.
511 * Obviously it also works in the opposite direction that we can easily
512 * speed up things when the CPU is less used.
513 *
514 * One complication of this algorithm is that we only measure the thread
515 * CPU usage, so we don't really know here the level of CPU usage in total
516 * of the system. Getting this information is quite complex and can
517 * quickly change if the user is also using the machine for many other
518 * things. In this case the algorithm will simply go up to the current
519 * maximum value. So it will work much the same as before this algorithm
520 * was put in place with the maximum value as the new DiskCheckpointSpeed
521 * parameter.
522 *
523 * The algorithm will work best in cases where the user has locked the
524 * thread to one or more CPUs and ensures that the thread can always run
525 * by not allocating more than one thread per CPU.
526 *
527 * The reason we put the CPU usage limits fairly high is that the LDM
528 * threads become more and more efficient as loads goes up. The reason
529 * for this is that as more and more signals are executed in each loop
530 * before checking for new signals. This means that as load goes up we
531 * spend more and more time doing useful work. At low loads we spend a
532 * significant time simply waiting for new signals to arrive and going to
533 * sleep and waking up. So being at 95% load still means that we have
534 * a bit more than 5% capacity left and even being at 90% means we
535 * might have as much as 20% more capacity to use.
536 */
537 jam();
538 EXECUTE_DIRECT(THRMAN,
539 GSN_GET_CPU_USAGE_REQ,
540 signal,
541 1,
542 getThrmanInstance());
543 Uint32 cpu_usage = signal->theData[0];
544 if (cpu_usage < 90)
545 {
546 jamEntry();
547 adjust_disk_write_speed_up(adjust_speed_up);
548 }
549 else if (cpu_usage < 95)
550 {
551 jamEntry();
552 }
553 else if (cpu_usage < 97)
554 {
555 jamEntry();
556 /* 95-96% load, slightly slow down */
557 slowdowns_due_to_high_cpu++;
558 adjust_disk_write_speed_down(adjust_speed_down_low);
559 }
560 else if (cpu_usage < 99)
561 {
562 jamEntry();
563 /* 97-98% load, slow down */
564 slowdowns_due_to_high_cpu++;
565 adjust_disk_write_speed_down(adjust_speed_down_medium);
566 }
567 else
568 {
569 jamEntry();
570 /* 99-100% load, slow down a bit faster */
571 slowdowns_due_to_high_cpu++;
572 adjust_disk_write_speed_down(adjust_speed_down_high);
573 }
574 }
575 }
576
577 void
send_next_reset_disk_speed_counter(Signal * signal)578 Backup::send_next_reset_disk_speed_counter(Signal *signal)
579 {
580 signal->theData[0] = BackupContinueB::RESET_DISK_SPEED_COUNTER;
581 sendSignalWithDelay(reference(),
582 GSN_CONTINUEB,
583 signal,
584 m_reset_delay_used,
585 1);
586 return;
587 }
588
589 void
execCHECK_NODE_RESTARTCONF(Signal * signal)590 Backup::execCHECK_NODE_RESTARTCONF(Signal *signal)
591 {
592 bool old_is_any_node_restarting = m_is_any_node_restarting;
593 m_is_any_node_restarting = (signal->theData[0] == 1);
594 if (old_is_any_node_restarting != m_is_any_node_restarting)
595 {
596 if (old_is_any_node_restarting)
597 {
598 g_eventLogger->info("We are adjusting Max Disk Write Speed,"
599 " no restarts ongoing anymore");
600 }
601 else
602 {
603 g_eventLogger->info("We are adjusting Max Disk Write Speed,"
604 " a restart is ongoing now");
605 }
606 }
607 }
608
609 void
execCONTINUEB(Signal * signal)610 Backup::execCONTINUEB(Signal* signal)
611 {
612 jamEntry();
613 const Uint32 Tdata0 = signal->theData[0];
614 const Uint32 Tdata1 = signal->theData[1];
615 const Uint32 Tdata2 = signal->theData[2];
616
617 switch(Tdata0) {
618 case BackupContinueB::RESET_DISK_SPEED_COUNTER:
619 {
620 jam();
621 const NDB_TICKS curr_time = NdbTick_getCurrentTicks();
622 const Uint64 millisPassed =
623 NdbTick_Elapsed(m_monitor_snapshot_start,curr_time).milliSec();
624 if (millisPassed >= 800 && !m_node_restart_check_sent)
625 {
626 /**
627 * Check for node restart ongoing, we will check for it and use
628 * the cached copy of the node restart state when deciding on the
629 * disk checkpoint speed. We will start this check a few intervals
630 * before calculating the new disk checkpoint speed. We will send
631 * such a check once per interval we are changing disk checkpoint
632 * speed.
633 *
634 * So we call DIH asynchronously here after 800ms have passed such
635 * that when 1000 ms have passed and we will check disk speeds we
636 * have information about if there is a node restart ongoing or not.
637 * This information will only affect disk write speed, so it's not
638 * a problem to rely on up to 200ms old information.
639 */
640 jam();
641 m_node_restart_check_sent = true;
642 signal->theData[0] = reference();
643 sendSignal(DBDIH_REF, GSN_CHECK_NODE_RESTARTREQ, signal, 1, JBB);
644 }
645 /**
646 * We check for millis passed larger than 989 to handle the situation
647 * when we wake up slightly too early. Since we only wake up once every
648 * 100 millisecond, this should be better than occasionally get intervals
649 * of 1100 milliseconds. All the calculations takes the real interval into
650 * account, so it should not corrupt any data.
651 */
652 if (millisPassed > 989)
653 {
654 jam();
655 m_node_restart_check_sent = false;
656 monitor_disk_write_speed(curr_time, millisPassed);
657 calculate_disk_write_speed(signal);
658 }
659 handle_overflow();
660 calculate_next_delay(curr_time);
661 send_next_reset_disk_speed_counter(signal);
662 break;
663 }
664 case BackupContinueB::BACKUP_FRAGMENT_INFO:
665 {
666 jam();
667 const Uint32 ptr_I = Tdata1;
668 Uint32 tabPtr_I = Tdata2;
669 Uint32 fragPtr_I = signal->theData[3];
670
671 BackupRecordPtr ptr;
672 c_backupPool.getPtr(ptr, ptr_I);
673 TablePtr tabPtr;
674 ptr.p->tables.getPtr(tabPtr, tabPtr_I);
675
676 if (fragPtr_I != tabPtr.p->fragments.getSize())
677 {
678 jam();
679 FragmentPtr fragPtr;
680 tabPtr.p->fragments.getPtr(fragPtr, fragPtr_I);
681
682 BackupFilePtr filePtr;
683 ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr);
684
685 const Uint32 sz = sizeof(BackupFormat::CtlFile::FragmentInfo) >> 2;
686 Uint32 * dst;
687 if (!filePtr.p->operation.dataBuffer.getWritePtr(&dst, sz))
688 {
689 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal,
690 WaitDiskBufferCapacityMillis, 4);
691 return;
692 }
693
694 BackupFormat::CtlFile::FragmentInfo * fragInfo =
695 (BackupFormat::CtlFile::FragmentInfo*)dst;
696 fragInfo->SectionType = htonl(BackupFormat::FRAGMENT_INFO);
697 fragInfo->SectionLength = htonl(sz);
698 fragInfo->TableId = htonl(fragPtr.p->tableId);
699 fragInfo->FragmentNo = htonl(fragPtr_I);
700 fragInfo->NoOfRecordsLow = htonl((Uint32)(fragPtr.p->noOfRecords & 0xFFFFFFFF));
701 fragInfo->NoOfRecordsHigh = htonl((Uint32)(fragPtr.p->noOfRecords >> 32));
702 fragInfo->FilePosLow = htonl(0);
703 fragInfo->FilePosHigh = htonl(0);
704
705 filePtr.p->operation.dataBuffer.updateWritePtr(sz);
706
707 fragPtr_I++;
708 }
709
710 if (fragPtr_I == tabPtr.p->fragments.getSize())
711 {
712 BackupLockTab *req = (BackupLockTab *)signal->getDataPtrSend();
713 req->m_senderRef = reference();
714 req->m_tableId = tabPtr.p->tableId;
715 req->m_lock_unlock = BackupLockTab::UNLOCK_TABLE;
716 req->m_backup_state = BackupLockTab::BACKUP_FRAGMENT_INFO;
717 req->m_backupRecordPtr_I = ptr_I;
718 req->m_tablePtr_I = tabPtr_I;
719 sendSignal(DBDICT_REF, GSN_BACKUP_LOCK_TAB_REQ, signal,
720 BackupLockTab::SignalLength, JBB);
721 return;
722 }
723
724 signal->theData[0] = BackupContinueB::BACKUP_FRAGMENT_INFO;
725 signal->theData[1] = ptr_I;
726 signal->theData[2] = tabPtr_I;
727 signal->theData[3] = fragPtr_I;
728 sendSignal(reference(), GSN_CONTINUEB, signal, 4, JBB);
729 return;
730 }
731 case BackupContinueB::START_FILE_THREAD:
732 case BackupContinueB::BUFFER_UNDERFLOW:
733 {
734 jam();
735 BackupFilePtr filePtr;
736 c_backupFilePool.getPtr(filePtr, Tdata1);
737 checkFile(signal, filePtr);
738 return;
739 }
740 break;
741 case BackupContinueB::BUFFER_FULL_SCAN:
742 {
743 jam();
744 BackupFilePtr filePtr;
745 BackupRecordPtr ptr;
746 c_backupFilePool.getPtr(filePtr, Tdata1);
747 c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
748 /**
749 * Given that we've been waiting a few milliseconds for buffers to become
750 * free, we need to initialise the priority mode algorithm to ensure that
751 * we select the correct priority mode.
752 *
753 * We get the number of jobs waiting at B-level to assess the current
754 * activity level to get a new starting point of the algorithm.
755 * Any load level below 16 signals in the buffer we ignore, if we have
756 * a higher level we provide a value that will ensure that we most likely
757 * will start at A-level.
758 */
759 init_scan_prio_level(signal, ptr);
760 checkScan(signal, ptr, filePtr);
761 return;
762 }
763 break;
764 case BackupContinueB::BUFFER_FULL_FRAG_COMPLETE:
765 {
766 jam();
767 BackupFilePtr filePtr;
768 c_backupFilePool.getPtr(filePtr, Tdata1);
769 fragmentCompleted(signal, filePtr);
770 return;
771 }
772 break;
773 case BackupContinueB::BUFFER_FULL_META:
774 {
775 jam();
776 BackupRecordPtr ptr;
777 c_backupPool.getPtr(ptr, Tdata1);
778
779 BackupFilePtr filePtr;
780 ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr);
781 FsBuffer & buf = filePtr.p->operation.dataBuffer;
782
783 if(buf.getFreeSize() < buf.getMaxWrite()) {
784 jam();
785 TablePtr tabPtr;
786 c_tablePool.getPtr(tabPtr, Tdata2);
787
788 DEBUG_OUT("Backup - Buffer full - "
789 << buf.getFreeSize()
790 << " < " << buf.getMaxWrite()
791 << " (sz: " << buf.getUsableSize()
792 << " getMinRead: " << buf.getMinRead()
793 << ") - tableId = " << tabPtr.p->tableId);
794
795 signal->theData[0] = BackupContinueB::BUFFER_FULL_META;
796 signal->theData[1] = Tdata1;
797 signal->theData[2] = Tdata2;
798 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal,
799 WaitDiskBufferCapacityMillis, 3);
800 return;
801 }//if
802
803 TablePtr tabPtr;
804 c_tablePool.getPtr(tabPtr, Tdata2);
805 GetTabInfoReq * req = (GetTabInfoReq *)signal->getDataPtrSend();
806 req->senderRef = reference();
807 req->senderData = ptr.i;
808 req->requestType = GetTabInfoReq::RequestById |
809 GetTabInfoReq::LongSignalConf;
810 req->tableId = tabPtr.p->tableId;
811 req->schemaTransId = 0;
812 sendSignal(DBDICT_REF, GSN_GET_TABINFOREQ, signal,
813 GetTabInfoReq::SignalLength, JBB);
814 return;
815 }
816 case BackupContinueB::ZDELAY_SCAN_NEXT:
817 if (ERROR_INSERTED(10039))
818 {
819 jam();
820 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal, 300,
821 signal->getLength());
822 return;
823 }
824 else
825 {
826 jam();
827 CLEAR_ERROR_INSERT_VALUE;
828 ndbout_c("Resuming backup");
829
830 Uint32 filePtr_I = Tdata1;
831 BackupFilePtr filePtr;
832 c_backupFilePool.getPtr(filePtr, filePtr_I);
833 BackupRecordPtr ptr;
834 c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
835 TablePtr tabPtr;
836 findTable(ptr, tabPtr, filePtr.p->tableId);
837 FragmentPtr fragPtr;
838 tabPtr.p->fragments.getPtr(fragPtr, filePtr.p->fragmentNo);
839
840 BlockReference lqhRef = 0;
841 if (ptr.p->is_lcp()) {
842 lqhRef = calcInstanceBlockRef(DBLQH);
843 } else {
844 const Uint32 instanceKey = fragPtr.p->lqhInstanceKey;
845 ndbrequire(instanceKey != 0);
846 lqhRef = numberToRef(DBLQH, instanceKey, getOwnNodeId());
847 }
848
849 memmove(signal->theData, signal->theData + 2,
850 4*ScanFragNextReq::SignalLength);
851
852 sendSignal(lqhRef, GSN_SCAN_NEXTREQ, signal,
853 ScanFragNextReq::SignalLength, JBB);
854 return ;
855 }
856 default:
857 ndbrequire(0);
858 }//switch
859 }
860
861 void
execBACKUP_LOCK_TAB_CONF(Signal * signal)862 Backup::execBACKUP_LOCK_TAB_CONF(Signal *signal)
863 {
864 jamEntry();
865 const BackupLockTab *conf = (const BackupLockTab *)signal->getDataPtrSend();
866 BackupRecordPtr ptr;
867 c_backupPool.getPtr(ptr, conf->m_backupRecordPtr_I);
868 TablePtr tabPtr;
869 ptr.p->tables.getPtr(tabPtr, conf->m_tablePtr_I);
870
871 switch(conf->m_backup_state) {
872 case BackupLockTab::BACKUP_FRAGMENT_INFO:
873 {
874 jam();
875 ptr.p->tables.next(tabPtr);
876 if (tabPtr.i == RNIL)
877 {
878 jam();
879 closeFiles(signal, ptr);
880 return;
881 }
882
883 signal->theData[0] = BackupContinueB::BACKUP_FRAGMENT_INFO;
884 signal->theData[1] = ptr.i;
885 signal->theData[2] = tabPtr.i;
886 signal->theData[3] = 0; // Start from first fragment of next table
887 sendSignal(reference(), GSN_CONTINUEB, signal, 4, JBB);
888 return;
889 }
890 case BackupLockTab::GET_TABINFO_CONF:
891 {
892 jam();
893 if (conf->errorCode)
894 {
895 jam();
896 defineBackupRef(signal, ptr, conf->errorCode);
897 return;
898 }
899
900 ptr.p->tables.next(tabPtr);
901 afterGetTabinfoLockTab(signal, ptr, tabPtr);
902 return;
903 }
904 case BackupLockTab::CLEANUP:
905 {
906 jam();
907 ptr.p->tables.next(tabPtr);
908 cleanupNextTable(signal, ptr, tabPtr);
909 return;
910 }
911 default:
912 ndbrequire(false);
913 }
914 }
915
916 void
execBACKUP_LOCK_TAB_REF(Signal * signal)917 Backup::execBACKUP_LOCK_TAB_REF(Signal *signal)
918 {
919 jamEntry();
920 ndbrequire(false /* Not currently possible. */);
921 }
922
get_new_speed_val64(Signal * signal)923 Uint64 Backup::get_new_speed_val64(Signal *signal)
924 {
925 if (signal->length() == 3)
926 {
927 jam();
928 Uint64 val = Uint64(signal->theData[1]);
929 val <<= 32;
930 val += Uint64(signal->theData[2]);
931 return val;
932 }
933 else
934 {
935 jam();
936 return 0;
937 }
938 }
939
get_new_speed_val32(Signal * signal)940 Uint64 Backup::get_new_speed_val32(Signal *signal)
941 {
942 if (signal->length() == 2)
943 {
944 jam();
945 return Uint64(signal->theData[1]);
946 }
947 else
948 {
949 jam();
950 return 0;
951 }
952 }
953
954 void
execDUMP_STATE_ORD(Signal * signal)955 Backup::execDUMP_STATE_ORD(Signal* signal)
956 {
957 jamEntry();
958
959 /* Dump commands used in public interfaces */
960 switch (signal->theData[0]) {
961 case DumpStateOrd::BackupStatus:
962 {
963 /* See code in BackupProxy.cpp as well */
964 BlockReference result_ref = CMVMI_REF;
965 if (signal->length() == 2)
966 result_ref = signal->theData[1];
967
968 BackupRecordPtr ptr;
969 int reported = 0;
970 for(c_backups.first(ptr); ptr.i != RNIL; c_backups.next(ptr))
971 {
972 if (!ptr.p->is_lcp())
973 {
974 reportStatus(signal, ptr, result_ref);
975 reported++;
976 }
977 }
978 if (!reported)
979 reportStatus(signal, ptr, result_ref);
980 return;
981 }
982 case DumpStateOrd::BackupMinWriteSpeed32:
983 {
984 jam();
985 Uint64 new_val = get_new_speed_val32(signal);
986 if (new_val < Uint64(1024*1024))
987 {
988 jam();
989 g_eventLogger->info("Use: DUMP 100001 MinDiskWriteSpeed");
990 return;
991 }
992 restore_disk_write_speed_numbers();
993 c_defaults.m_disk_write_speed_min = new_val;
994 calculate_real_disk_write_speed_parameters();
995 return;
996 }
997 case DumpStateOrd::BackupMaxWriteSpeed32:
998 {
999 jam();
1000 Uint64 new_val = get_new_speed_val32(signal);
1001 if (new_val < Uint64(1024*1024))
1002 {
1003 jam();
1004 g_eventLogger->info("Use: DUMP 100002 MaxDiskWriteSpeed");
1005 return;
1006 }
1007 restore_disk_write_speed_numbers();
1008 c_defaults.m_disk_write_speed_max = new_val;
1009 calculate_real_disk_write_speed_parameters();
1010 return;
1011 }
1012 case DumpStateOrd::BackupMaxWriteSpeedOtherNodeRestart32:
1013 {
1014 jam();
1015 Uint64 new_val = get_new_speed_val32(signal);
1016 if (new_val < Uint64(1024*1024))
1017 {
1018 jam();
1019 g_eventLogger->info("Use: DUMP 100003 MaxDiskWriteSpeedOtherNodeRestart");
1020 return;
1021 }
1022 restore_disk_write_speed_numbers();
1023 c_defaults.m_disk_write_speed_max_other_node_restart = new_val;
1024 calculate_real_disk_write_speed_parameters();
1025 return;
1026 }
1027 case DumpStateOrd::BackupMinWriteSpeed64:
1028 {
1029 jam();
1030 Uint64 new_val = get_new_speed_val64(signal);
1031 if (new_val < Uint64(1024*1024))
1032 {
1033 jam();
1034 g_eventLogger->info("Use: DUMP 100004 MinDiskWriteSpeed(MSB) "
1035 "MinDiskWriteSpeed(LSB)");
1036 return;
1037 }
1038 restore_disk_write_speed_numbers();
1039 c_defaults.m_disk_write_speed_min = new_val;
1040 calculate_real_disk_write_speed_parameters();
1041 return;
1042 }
1043 case DumpStateOrd::BackupMaxWriteSpeed64:
1044 {
1045 jam();
1046 Uint64 new_val = get_new_speed_val64(signal);
1047 if (new_val < Uint64(1024*1024))
1048 {
1049 jam();
1050 g_eventLogger->info("Use: DUMP 100005 MaxDiskWriteSpeed(MSB) "
1051 "MaxDiskWriteSpeed(LSB)");
1052 return;
1053 }
1054 restore_disk_write_speed_numbers();
1055 c_defaults.m_disk_write_speed_max = new_val;
1056 calculate_real_disk_write_speed_parameters();
1057 return;
1058 }
1059 case DumpStateOrd::BackupMaxWriteSpeedOtherNodeRestart64:
1060 {
1061 jam();
1062 Uint64 new_val = get_new_speed_val64(signal);
1063 if (new_val < Uint64(1024*1024))
1064 {
1065 jam();
1066 g_eventLogger->info("Use: DUMP 100006"
1067 " MaxDiskWriteSpeedOtherNodeRestart(MSB)"
1068 " MaxDiskWriteSpeedOtherNodeRestart(LSB)");
1069 return;
1070 }
1071 restore_disk_write_speed_numbers();
1072 c_defaults.m_disk_write_speed_max_other_node_restart = new_val;
1073 calculate_real_disk_write_speed_parameters();
1074 return;
1075 }
1076 default:
1077 /* continue to debug section */
1078 break;
1079 }
1080
1081 /* Debugging or unclassified section */
1082
1083 if(signal->theData[0] == 20){
1084 if(signal->length() > 1){
1085 c_defaults.m_dataBufferSize = (signal->theData[1] * 1024 * 1024);
1086 }
1087 if(signal->length() > 2){
1088 c_defaults.m_logBufferSize = (signal->theData[2] * 1024 * 1024);
1089 }
1090 if(signal->length() > 3){
1091 c_defaults.m_minWriteSize = signal->theData[3] * 1024;
1092 }
1093 if(signal->length() > 4){
1094 c_defaults.m_maxWriteSize = signal->theData[4] * 1024;
1095 }
1096
1097 infoEvent("Backup: data: %d log: %d min: %d max: %d",
1098 c_defaults.m_dataBufferSize,
1099 c_defaults.m_logBufferSize,
1100 c_defaults.m_minWriteSize,
1101 c_defaults.m_maxWriteSize);
1102 return;
1103 }
1104 if(signal->theData[0] == 21){
1105 BackupReq * req = (BackupReq*)signal->getDataPtrSend();
1106 req->senderData = 23;
1107 req->backupDataLen = 0;
1108 sendSignal(reference(), GSN_BACKUP_REQ,signal,BackupReq::SignalLength, JBB);
1109 startTime = NdbTick_getCurrentTicks();
1110 return;
1111 }
1112
1113 if(signal->theData[0] == 22){
1114 const Uint32 seq = signal->theData[1];
1115 FsRemoveReq * req = (FsRemoveReq *)signal->getDataPtrSend();
1116 req->userReference = reference();
1117 req->userPointer = 23;
1118 req->directory = 1;
1119 req->ownDirectory = 1;
1120 FsOpenReq::setVersion(req->fileNumber, 2);
1121 FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_CTL);
1122 FsOpenReq::v2_setSequence(req->fileNumber, seq);
1123 FsOpenReq::v2_setNodeId(req->fileNumber, getOwnNodeId());
1124 sendSignal(NDBFS_REF, GSN_FSREMOVEREQ, signal,
1125 FsRemoveReq::SignalLength, JBA);
1126 return;
1127 }
1128
1129 if(signal->theData[0] == 23){
1130 /**
1131 * Print records
1132 */
1133 BackupRecordPtr ptr;
1134 for(c_backups.first(ptr); ptr.i != RNIL; c_backups.next(ptr)){
1135 infoEvent("BackupRecord %d: BackupId: %u MasterRef: %x ClientRef: %x",
1136 ptr.i, ptr.p->backupId, ptr.p->masterRef, ptr.p->clientRef);
1137 infoEvent(" State: %d", ptr.p->slaveState.getState());
1138 BackupFilePtr filePtr;
1139 for(ptr.p->files.first(filePtr); filePtr.i != RNIL;
1140 ptr.p->files.next(filePtr)){
1141 jam();
1142 infoEvent(" file %d: type: %d flags: H'%x",
1143 filePtr.i, filePtr.p->fileType,
1144 filePtr.p->m_flags);
1145 }
1146 }
1147
1148 const NDB_TICKS now = NdbTick_getCurrentTicks();
1149 const Uint64 resetElapsed = NdbTick_Elapsed(m_reset_disk_speed_time,now).milliSec();
1150 const Uint64 millisPassed = NdbTick_Elapsed(m_monitor_snapshot_start,now).milliSec();
1151 /* Dump measured disk write speed since last RESET_DISK_SPEED */
1152 ndbout_c("m_curr_disk_write_speed: %ukb m_words_written_this_period:"
1153 " %ukwords m_overflow_disk_write: %ukb",
1154 Uint32(4 * m_curr_disk_write_speed / 1024),
1155 Uint32(m_words_written_this_period / 1024),
1156 Uint32(m_overflow_disk_write / 1024));
1157 ndbout_c("m_reset_delay_used: %u time since last RESET_DISK_SPEED: %llu millis",
1158 m_reset_delay_used, resetElapsed);
1159 /* Dump measured rate since last snapshot start */
1160 Uint64 byteRate = (4000 * m_monitor_words_written) / (millisPassed + 1);
1161 ndbout_c("m_monitor_words_written : %llu, duration : %llu millis, rate :"
1162 " %llu bytes/s : (%u pct of config)",
1163 m_monitor_words_written, millisPassed,
1164 byteRate,
1165 (Uint32) ((100 * byteRate / (4 * 10)) /
1166 (m_curr_disk_write_speed + 1)));
1167
1168 for(c_backups.first(ptr); ptr.i != RNIL; c_backups.next(ptr))
1169 {
1170 ndbout_c("BackupRecord %u: BackupId: %u MasterRef: %x ClientRef: %x",
1171 ptr.i, ptr.p->backupId, ptr.p->masterRef, ptr.p->clientRef);
1172 ndbout_c(" State: %u", ptr.p->slaveState.getState());
1173 ndbout_c(" noOfByte: %llu noOfRecords: %llu",
1174 ptr.p->noOfBytes, ptr.p->noOfRecords);
1175 ndbout_c(" noOfLogBytes: %llu noOfLogRecords: %llu",
1176 ptr.p->noOfLogBytes, ptr.p->noOfLogRecords);
1177 ndbout_c(" errorCode: %u", ptr.p->errorCode);
1178 BackupFilePtr filePtr;
1179 for(ptr.p->files.first(filePtr); filePtr.i != RNIL;
1180 ptr.p->files.next(filePtr))
1181 {
1182 ndbout_c(" file %u: type: %u flags: H'%x tableId: %u fragmentId: %u",
1183 filePtr.i, filePtr.p->fileType, filePtr.p->m_flags,
1184 filePtr.p->tableId, filePtr.p->fragmentNo);
1185 }
1186 if (ptr.p->slaveState.getState() == SCANNING && ptr.p->dataFilePtr != RNIL)
1187 {
1188 c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr);
1189 OperationRecord & op = filePtr.p->operation;
1190 Uint32 *tmp = NULL;
1191 Uint32 sz = 0;
1192 bool eof = FALSE;
1193 bool ready = op.dataBuffer.getReadPtr(&tmp, &sz, &eof);
1194 ndbout_c("ready: %s eof: %s", ready ? "TRUE" : "FALSE", eof ? "TRUE" : "FALSE");
1195 }
1196 }
1197 return;
1198 }
1199 if(signal->theData[0] == 24){
1200 /**
1201 * Print size of records etc.
1202 */
1203 infoEvent("Backup - dump pool sizes");
1204 infoEvent("BackupPool: %d BackupFilePool: %d TablePool: %d",
1205 c_backupPool.getSize(), c_backupFilePool.getSize(),
1206 c_tablePool.getSize());
1207 infoEvent("AttrPool: %d TriggerPool: %d FragmentPool: %d",
1208 c_backupPool.getSize(), c_backupFilePool.getSize(),
1209 c_tablePool.getSize());
1210 infoEvent("PagePool: %d",
1211 c_pagePool.getSize());
1212
1213
1214 if(signal->getLength() == 2 && signal->theData[1] == 2424)
1215 {
1216 /**
1217 * Handle LCP
1218 */
1219 BackupRecordPtr lcp;
1220 ndbrequire(c_backups.first(lcp));
1221
1222 ndbrequire(c_backupPool.getSize() == c_backupPool.getNoOfFree() + 1);
1223 if(lcp.p->tables.isEmpty())
1224 {
1225 ndbrequire(c_tablePool.getSize() == c_tablePool.getNoOfFree());
1226 ndbrequire(c_fragmentPool.getSize() == c_fragmentPool.getNoOfFree());
1227 ndbrequire(c_triggerPool.getSize() == c_triggerPool.getNoOfFree());
1228 }
1229 ndbrequire(c_backupFilePool.getSize() == c_backupFilePool.getNoOfFree() + 1);
1230 BackupFilePtr lcp_file;
1231 c_backupFilePool.getPtr(lcp_file, lcp.p->dataFilePtr);
1232 ndbrequire(c_pagePool.getSize() ==
1233 c_pagePool.getNoOfFree() +
1234 lcp_file.p->pages.getSize());
1235 }
1236 }
1237
1238 if(signal->theData[0] == DumpStateOrd::DumpBackup)
1239 {
1240 /* Display a bunch of stuff about Backup defaults */
1241 infoEvent("Compressed Backup: %d", c_defaults.m_compressed_backup);
1242 infoEvent("Compressed LCP: %d", c_defaults.m_compressed_lcp);
1243 }
1244
1245 if(signal->theData[0] == DumpStateOrd::DumpBackupSetCompressed)
1246 {
1247 c_defaults.m_compressed_backup= signal->theData[1];
1248 infoEvent("Compressed Backup: %d", c_defaults.m_compressed_backup);
1249 }
1250
1251 if(signal->theData[0] == DumpStateOrd::DumpBackupSetCompressedLCP)
1252 {
1253 c_defaults.m_compressed_lcp= signal->theData[1];
1254 infoEvent("Compressed LCP: %d", c_defaults.m_compressed_lcp);
1255 }
1256
1257 if (signal->theData[0] == DumpStateOrd::BackupErrorInsert)
1258 {
1259 if (signal->getLength() == 1)
1260 ndbout_c("BACKUP: setting error %u", signal->theData[1]);
1261 else
1262 ndbout_c("BACKUP: setting error %u, %u",
1263 signal->theData[1], signal->theData[2]);
1264 SET_ERROR_INSERT_VALUE2(signal->theData[1], signal->theData[2]);
1265 }
1266 }
1267
1268 /**
1269 * We are using a round buffer of measurements, to simplify the code we
1270 * use this routing to quickly derive the disk write record from an index
1271 * (how many seconds back we want to check).
1272 */
1273 Uint32
get_disk_write_speed_record(Uint32 start_index)1274 Backup::get_disk_write_speed_record(Uint32 start_index)
1275 {
1276 ndbassert(start_index < DISK_WRITE_SPEED_REPORT_SIZE);
1277 if (next_disk_write_speed_report == last_disk_write_speed_report)
1278 {
1279 /* No speed reports generated yet */
1280 return DISK_WRITE_SPEED_REPORT_SIZE;
1281 }
1282 if (start_index < next_disk_write_speed_report)
1283 {
1284 return (next_disk_write_speed_report - (start_index + 1));
1285 }
1286 else if (last_disk_write_speed_report == 0)
1287 {
1288 /**
1289 * We might still be in inital phase when not all records have
1290 * been written yet.
1291 */
1292 return DISK_WRITE_SPEED_REPORT_SIZE;
1293 }
1294 else
1295 {
1296 return (DISK_WRITE_SPEED_REPORT_SIZE -
1297 ((start_index + 1) - next_disk_write_speed_report));
1298 }
1299 ndbassert(false);
1300 return 0;
1301 }
1302
1303 /**
1304 * Calculates the average speed for a number of seconds back.
1305 * reports the numbers in number of milliseconds that actually
1306 * passed and the number of bytes written in this period.
1307 */
1308 void
calculate_disk_write_speed_seconds_back(Uint32 seconds_back,Uint64 & millis_passed,Uint64 & backup_lcp_bytes_written,Uint64 & redo_bytes_written)1309 Backup::calculate_disk_write_speed_seconds_back(Uint32 seconds_back,
1310 Uint64 & millis_passed,
1311 Uint64 & backup_lcp_bytes_written,
1312 Uint64 & redo_bytes_written)
1313 {
1314 Uint64 millis_back = (MILLIS_IN_A_SECOND * seconds_back) -
1315 MILLIS_ADJUST_FOR_EARLY_REPORT;
1316 Uint32 start_index = 0;
1317
1318 ndbassert(seconds_back > 0);
1319
1320 millis_passed = 0;
1321 backup_lcp_bytes_written = 0;
1322 redo_bytes_written = 0;
1323 jam();
1324 while (millis_passed < millis_back &&
1325 start_index < DISK_WRITE_SPEED_REPORT_SIZE)
1326 {
1327 jam();
1328 Uint32 disk_write_speed_record = get_disk_write_speed_record(start_index);
1329 if (disk_write_speed_record == DISK_WRITE_SPEED_REPORT_SIZE)
1330 break;
1331 millis_passed +=
1332 disk_write_speed_rep[disk_write_speed_record].millis_passed;
1333 backup_lcp_bytes_written +=
1334 disk_write_speed_rep[disk_write_speed_record].backup_lcp_bytes_written;
1335 redo_bytes_written +=
1336 disk_write_speed_rep[disk_write_speed_record].redo_bytes_written;
1337 start_index++;
1338 }
1339 /**
1340 * Always report at least one millisecond to avoid risk of division
1341 * by zero later on in the code.
1342 */
1343 jam();
1344 if (millis_passed == 0)
1345 {
1346 jam();
1347 millis_passed = 1;
1348 }
1349 return;
1350 }
1351
1352 void
calculate_std_disk_write_speed_seconds_back(Uint32 seconds_back,Uint64 millis_passed_total,Uint64 backup_lcp_bytes_written,Uint64 redo_bytes_written,Uint64 & std_dev_backup_lcp_in_bytes_per_sec,Uint64 & std_dev_redo_in_bytes_per_sec)1353 Backup::calculate_std_disk_write_speed_seconds_back(Uint32 seconds_back,
1354 Uint64 millis_passed_total,
1355 Uint64 backup_lcp_bytes_written,
1356 Uint64 redo_bytes_written,
1357 Uint64 & std_dev_backup_lcp_in_bytes_per_sec,
1358 Uint64 & std_dev_redo_in_bytes_per_sec)
1359 {
1360 Uint32 start_index = 0;
1361 Uint64 millis_passed = 0;
1362 Uint64 millis_back = (MILLIS_IN_A_SECOND * seconds_back) -
1363 MILLIS_ADJUST_FOR_EARLY_REPORT;
1364 Uint64 millis_passed_this_period;
1365
1366 Uint64 avg_backup_lcp_bytes_per_milli;
1367 Uint64 backup_lcp_bytes_written_this_period;
1368 Uint64 avg_backup_lcp_bytes_per_milli_this_period;
1369 long double backup_lcp_temp_sum;
1370 long double backup_lcp_square_sum;
1371
1372 Uint64 avg_redo_bytes_per_milli;
1373 Uint64 redo_bytes_written_this_period;
1374 Uint64 avg_redo_bytes_per_milli_this_period;
1375 long double redo_temp_sum;
1376 long double redo_square_sum;
1377
1378 ndbassert(seconds_back > 0);
1379 if (millis_passed_total == 0)
1380 {
1381 jam();
1382 std_dev_backup_lcp_in_bytes_per_sec = 0;
1383 std_dev_redo_in_bytes_per_sec = 0;
1384 return;
1385 }
1386 avg_backup_lcp_bytes_per_milli = backup_lcp_bytes_written /
1387 millis_passed_total;
1388 avg_redo_bytes_per_milli = redo_bytes_written / millis_passed_total;
1389 backup_lcp_square_sum = 0;
1390 redo_square_sum = 0;
1391 jam();
1392 while (millis_passed < millis_back &&
1393 start_index < DISK_WRITE_SPEED_REPORT_SIZE)
1394 {
1395 jam();
1396 Uint32 disk_write_speed_record = get_disk_write_speed_record(start_index);
1397 if (disk_write_speed_record == DISK_WRITE_SPEED_REPORT_SIZE)
1398 break;
1399 millis_passed_this_period =
1400 disk_write_speed_rep[disk_write_speed_record].millis_passed;
1401 backup_lcp_bytes_written_this_period =
1402 disk_write_speed_rep[disk_write_speed_record].backup_lcp_bytes_written;
1403 redo_bytes_written_this_period =
1404 disk_write_speed_rep[disk_write_speed_record].redo_bytes_written;
1405 millis_passed += millis_passed_this_period;
1406
1407 if (millis_passed_this_period != 0)
1408 {
1409 /**
1410 * We use here a calculation of standard deviation that firsts
1411 * calculates the variance. The variance is calculated as the square
1412 * mean of the difference. To get standard intervals we compute the
1413 * average per millisecond and then sum over all milliseconds. To
1414 * simplify the calculation we then multiply the square of the diffs
1415 * per milli to the number of millis passed in a particular measurement.
1416 * We divide by the total number of millis passed. We do this first to
1417 * avoid too big numbers. We use long double in all calculations to
1418 * ensure that we don't overflow.
1419 *
1420 * We also try to avoid divisions by zero in the code in multiple
1421 * places when we query this table before the first measurement have
1422 * been logged.
1423 *
1424 * Calculating standard deviation as:
1425 * Sum of X(i) - E(X) squared where X(i) is the average per millisecond
1426 * in this time period and E(X) is the average over the entire period.
1427 * We divide by number of periods, but to get it more real, we divide
1428 * by total_millis / millis_in_this_period since the periods aren't
1429 * exactly the same. Finally we take square root of the sum of those
1430 * (X(i) - E(X))^2 / #periods. Actually the standard deviation should
1431 * be calculated using #periods - 1 as divisor. Finally we also need
1432 * to convert it from standard deviation per millisecond to standard
1433 * deviation per second. We make that simple by multiplying the
1434 * result from this function by 1000.
1435 */
1436 jam();
1437 avg_backup_lcp_bytes_per_milli_this_period =
1438 backup_lcp_bytes_written_this_period / millis_passed_this_period;
1439 backup_lcp_temp_sum = (long double)avg_backup_lcp_bytes_per_milli;
1440 backup_lcp_temp_sum -=
1441 (long double)avg_backup_lcp_bytes_per_milli_this_period;
1442 backup_lcp_temp_sum *= backup_lcp_temp_sum;
1443 backup_lcp_temp_sum /= (long double)millis_passed_total;
1444 backup_lcp_temp_sum *= (long double)millis_passed_this_period;
1445 backup_lcp_square_sum += backup_lcp_temp_sum;
1446
1447 avg_redo_bytes_per_milli_this_period =
1448 redo_bytes_written_this_period / millis_passed_this_period;
1449 redo_temp_sum = (long double)avg_redo_bytes_per_milli;
1450 redo_temp_sum -= (long double)avg_redo_bytes_per_milli_this_period;
1451 redo_temp_sum *= redo_temp_sum;
1452 redo_temp_sum /= (long double)millis_passed_total;
1453 redo_temp_sum *= (long double)millis_passed_this_period;
1454 redo_square_sum += redo_temp_sum;
1455 }
1456 start_index++;
1457 }
1458 if (millis_passed == 0)
1459 {
1460 jam();
1461 std_dev_backup_lcp_in_bytes_per_sec = 0;
1462 std_dev_redo_in_bytes_per_sec = 0;
1463 return;
1464 }
1465 /**
1466 * Calculate standard deviation per millisecond
1467 * We use long double for the calculation, but we want to report it to
1468 * it in bytes per second, so this is easiest to do with an unsigned
1469 * integer number. Conversion from long double to Uint64 is a real
1470 * conversion that we leave to the compiler to generate code to make.
1471 */
1472 std_dev_backup_lcp_in_bytes_per_sec = (Uint64)sqrtl(backup_lcp_square_sum);
1473 std_dev_redo_in_bytes_per_sec = (Uint64)sqrtl(redo_square_sum);
1474
1475 /**
1476 * Convert to standard deviation per second
1477 * We calculated it in bytes per millisecond, so simple multiplication of
1478 * 1000 is sufficient here.
1479 */
1480 std_dev_backup_lcp_in_bytes_per_sec*= (Uint64)1000;
1481 std_dev_redo_in_bytes_per_sec*= (Uint64)1000;
1482 }
1483
1484 Uint64
calculate_millis_since_finished(Uint32 start_index)1485 Backup::calculate_millis_since_finished(Uint32 start_index)
1486 {
1487 Uint64 millis_passed = 0;
1488 jam();
1489 if (start_index == 0)
1490 {
1491 jam();
1492 return 0;
1493 }
1494 for (Uint32 i = 0; i < start_index; i++)
1495 {
1496 Uint32 disk_write_speed_record = get_disk_write_speed_record(i);
1497 millis_passed +=
1498 disk_write_speed_rep[disk_write_speed_record].millis_passed;
1499 }
1500 return millis_passed;
1501 }
1502
execDBINFO_SCANREQ(Signal * signal)1503 void Backup::execDBINFO_SCANREQ(Signal *signal)
1504 {
1505 jamEntry();
1506 DbinfoScanReq req= *(DbinfoScanReq*)signal->theData;
1507 const Ndbinfo::ScanCursor* cursor =
1508 CAST_CONSTPTR(Ndbinfo::ScanCursor, DbinfoScan::getCursorPtr(&req));
1509
1510 Ndbinfo::Ratelimit rl;
1511
1512 switch(req.tableId){
1513 case Ndbinfo::POOLS_TABLEID:
1514 {
1515 Ndbinfo::pool_entry pools[] =
1516 {
1517 { "Backup Record",
1518 c_backupPool.getUsed(),
1519 c_backupPool.getSize(),
1520 c_backupPool.getEntrySize(),
1521 c_backupPool.getUsedHi(),
1522 { CFG_DB_PARALLEL_BACKUPS,0,0,0 }},
1523 { "Backup File",
1524 c_backupFilePool.getUsed(),
1525 c_backupFilePool.getSize(),
1526 c_backupFilePool.getEntrySize(),
1527 c_backupFilePool.getUsedHi(),
1528 { CFG_DB_PARALLEL_BACKUPS,0,0,0 }},
1529 { "Table",
1530 c_tablePool.getUsed(),
1531 c_tablePool.getSize(),
1532 c_tablePool.getEntrySize(),
1533 c_tablePool.getUsedHi(),
1534 { CFG_DB_PARALLEL_BACKUPS,
1535 CFG_DB_NO_TABLES,
1536 CFG_DB_NO_ORDERED_INDEXES,
1537 CFG_DB_NO_UNIQUE_HASH_INDEXES }},
1538 { "Trigger",
1539 c_triggerPool.getUsed(),
1540 c_triggerPool.getSize(),
1541 c_triggerPool.getEntrySize(),
1542 c_triggerPool.getUsedHi(),
1543 { CFG_DB_PARALLEL_BACKUPS,
1544 CFG_DB_NO_TABLES,
1545 CFG_DB_NO_ORDERED_INDEXES,
1546 CFG_DB_NO_UNIQUE_HASH_INDEXES }},
1547 { "Fragment",
1548 c_fragmentPool.getUsed(),
1549 c_fragmentPool.getSize(),
1550 c_fragmentPool.getEntrySize(),
1551 c_fragmentPool.getUsedHi(),
1552 { CFG_DB_NO_TABLES,
1553 CFG_DB_NO_ORDERED_INDEXES,
1554 CFG_DB_NO_UNIQUE_HASH_INDEXES,0 }},
1555 { "Page",
1556 c_pagePool.getUsed(),
1557 c_pagePool.getSize(),
1558 c_pagePool.getEntrySize(),
1559 c_pagePool.getUsedHi(),
1560 { CFG_DB_BACKUP_MEM,
1561 CFG_DB_BACKUP_DATA_BUFFER_MEM,0,0 }},
1562 { NULL, 0,0,0,0, { 0,0,0,0 }}
1563 };
1564
1565 const size_t num_config_params =
1566 sizeof(pools[0].config_params) / sizeof(pools[0].config_params[0]);
1567 Uint32 pool = cursor->data[0];
1568 BlockNumber bn = blockToMain(number());
1569 while(pools[pool].poolname)
1570 {
1571 jam();
1572 Ndbinfo::Row row(signal, req);
1573 row.write_uint32(getOwnNodeId());
1574 row.write_uint32(bn); // block number
1575 row.write_uint32(instance()); // block instance
1576 row.write_string(pools[pool].poolname);
1577
1578 row.write_uint64(pools[pool].used);
1579 row.write_uint64(pools[pool].total);
1580 row.write_uint64(pools[pool].used_hi);
1581 row.write_uint64(pools[pool].entry_size);
1582 for (size_t i = 0; i < num_config_params; i++)
1583 row.write_uint32(pools[pool].config_params[i]);
1584 ndbinfo_send_row(signal, req, row, rl);
1585 pool++;
1586 if (rl.need_break(req))
1587 {
1588 jam();
1589 ndbinfo_send_scan_break(signal, req, rl, pool);
1590 return;
1591 }
1592 }
1593 break;
1594 }
1595 case Ndbinfo::DISK_WRITE_SPEED_AGGREGATE_TABLEID:
1596 {
1597
1598 jam();
1599 Uint64 backup_lcp_bytes_written;
1600 Uint64 redo_bytes_written;
1601 Uint64 std_dev_backup_lcp;
1602 Uint64 std_dev_redo;
1603 Uint64 millis_passed;
1604 Ndbinfo::Row row(signal, req);
1605 Uint32 ldm_instance = instance();
1606
1607 if (ldm_instance > 0)
1608 {
1609 /* Always start counting instances from 0 */
1610 ldm_instance--;
1611 }
1612 row.write_uint32(getOwnNodeId());
1613 row.write_uint32(ldm_instance);
1614
1615 /* Report last second */
1616 calculate_disk_write_speed_seconds_back(1,
1617 millis_passed,
1618 backup_lcp_bytes_written,
1619 redo_bytes_written);
1620
1621 row.write_uint64((backup_lcp_bytes_written / millis_passed ) * 1000);
1622 row.write_uint64((redo_bytes_written / millis_passed) * 1000);
1623
1624 /* Report average and std_dev of last 10 seconds */
1625 calculate_disk_write_speed_seconds_back(10,
1626 millis_passed,
1627 backup_lcp_bytes_written,
1628 redo_bytes_written);
1629
1630 row.write_uint64((backup_lcp_bytes_written * 1000) / millis_passed);
1631 row.write_uint64((redo_bytes_written * 1000) / millis_passed);
1632
1633 calculate_std_disk_write_speed_seconds_back(10,
1634 millis_passed,
1635 backup_lcp_bytes_written,
1636 redo_bytes_written,
1637 std_dev_backup_lcp,
1638 std_dev_redo);
1639
1640 row.write_uint64(std_dev_backup_lcp);
1641 row.write_uint64(std_dev_redo);
1642
1643 /* Report average and std_dev of last 60 seconds */
1644 calculate_disk_write_speed_seconds_back(60,
1645 millis_passed,
1646 backup_lcp_bytes_written,
1647 redo_bytes_written);
1648
1649 row.write_uint64((backup_lcp_bytes_written / millis_passed ) * 1000);
1650 row.write_uint64((redo_bytes_written / millis_passed) * 1000);
1651
1652 calculate_std_disk_write_speed_seconds_back(60,
1653 millis_passed,
1654 backup_lcp_bytes_written,
1655 redo_bytes_written,
1656 std_dev_backup_lcp,
1657 std_dev_redo);
1658
1659 row.write_uint64(std_dev_backup_lcp);
1660 row.write_uint64(std_dev_redo);
1661
1662 row.write_uint64(slowdowns_due_to_io_lag);
1663 row.write_uint64(slowdowns_due_to_high_cpu);
1664 row.write_uint64(disk_write_speed_set_to_min);
1665 row.write_uint64(m_curr_disk_write_speed *
1666 CURR_DISK_SPEED_CONVERSION_FACTOR_TO_SECONDS);
1667
1668 ndbinfo_send_row(signal, req, row, rl);
1669 break;
1670 }
1671 case Ndbinfo::DISK_WRITE_SPEED_BASE_TABLEID:
1672 {
1673 jam();
1674 Uint32 ldm_instance = instance();
1675
1676 if (ldm_instance > 0)
1677 {
1678 /* Always start counting instances from 0 */
1679 ldm_instance--;
1680 }
1681 Uint32 start_index = cursor->data[0];
1682 for ( ; start_index < DISK_WRITE_SPEED_REPORT_SIZE;)
1683 {
1684 jam();
1685 Ndbinfo::Row row(signal, req);
1686 row.write_uint32(getOwnNodeId());
1687 row.write_uint32(ldm_instance);
1688 Uint32 disk_write_speed_record = get_disk_write_speed_record(start_index);
1689 if (disk_write_speed_record != DISK_WRITE_SPEED_REPORT_SIZE)
1690 {
1691 jam();
1692 Uint64 backup_lcp_bytes_written_this_period =
1693 disk_write_speed_rep[disk_write_speed_record].
1694 backup_lcp_bytes_written;
1695 Uint64 redo_bytes_written_this_period =
1696 disk_write_speed_rep[disk_write_speed_record].
1697 redo_bytes_written;
1698 Uint64 millis_passed_this_period =
1699 disk_write_speed_rep[disk_write_speed_record].millis_passed;
1700 Uint64 millis_since_finished =
1701 calculate_millis_since_finished(start_index);
1702 Uint64 target_disk_write_speed =
1703 disk_write_speed_rep[disk_write_speed_record].target_disk_write_speed;
1704
1705 row.write_uint64(millis_since_finished);
1706 row.write_uint64(millis_passed_this_period);
1707 row.write_uint64(backup_lcp_bytes_written_this_period);
1708 row.write_uint64(redo_bytes_written_this_period);
1709 row.write_uint64(target_disk_write_speed);
1710 }
1711 else
1712 {
1713 jam();
1714 row.write_uint64((Uint64)0);
1715 row.write_uint64((Uint64)0);
1716 row.write_uint64((Uint64)0);
1717 row.write_uint64((Uint64)0);
1718 row.write_uint64((Uint64)0);
1719 }
1720 ndbinfo_send_row(signal, req, row, rl);
1721 start_index++;
1722 if (rl.need_break(req))
1723 {
1724 jam();
1725 ndbinfo_send_scan_break(signal, req, rl, start_index);
1726 return;
1727 }
1728 }
1729 break;
1730 }
1731 default:
1732 break;
1733 }
1734
1735 ndbinfo_send_scan_conf(signal, req, rl);
1736 }
1737
1738 bool
findTable(const BackupRecordPtr & ptr,TablePtr & tabPtr,Uint32 tableId) const1739 Backup::findTable(const BackupRecordPtr & ptr,
1740 TablePtr & tabPtr, Uint32 tableId) const
1741 {
1742 for(ptr.p->tables.first(tabPtr);
1743 tabPtr.i != RNIL;
1744 ptr.p->tables.next(tabPtr)) {
1745 jam();
1746 if(tabPtr.p->tableId == tableId){
1747 jam();
1748 return true;
1749 }//if
1750 }//for
1751 tabPtr.i = RNIL;
1752 tabPtr.p = 0;
1753 return false;
1754 }
1755
xps(Uint64 x,Uint64 ms)1756 static Uint32 xps(Uint64 x, Uint64 ms)
1757 {
1758 float fx = float(x);
1759 float fs = float(ms);
1760
1761 if(ms == 0 || x == 0) {
1762 jamNoBlock();
1763 return 0;
1764 }//if
1765 jamNoBlock();
1766 return ((Uint32)(1000.0f * (fx + fs/2.1f))) / ((Uint32)fs);
1767 }
1768
1769 struct Number {
NumberNumber1770 Number(Uint64 r) { val = r;}
operator =Number1771 Number & operator=(Uint64 r) { val = r; return * this; }
1772 Uint64 val;
1773 };
1774
1775 NdbOut &
operator <<(NdbOut & out,const Number & val)1776 operator<< (NdbOut & out, const Number & val){
1777 char p = 0;
1778 Uint32 loop = 1;
1779 while(val.val > loop){
1780 loop *= 1000;
1781 p += 3;
1782 }
1783 if(loop != 1){
1784 p -= 3;
1785 loop /= 1000;
1786 }
1787
1788 switch(p){
1789 case 0:
1790 break;
1791 case 3:
1792 p = 'k';
1793 break;
1794 case 6:
1795 p = 'M';
1796 break;
1797 case 9:
1798 p = 'G';
1799 break;
1800 default:
1801 p = 0;
1802 }
1803 char str[2];
1804 str[0] = p;
1805 str[1] = 0;
1806 Uint32 tmp = (Uint32)((val.val + (loop >> 1)) / loop);
1807 #if 1
1808 if(p > 0)
1809 out << tmp << str;
1810 else
1811 out << tmp;
1812 #else
1813 out << val.val;
1814 #endif
1815
1816 return out;
1817 }
1818
1819 void
execBACKUP_CONF(Signal * signal)1820 Backup::execBACKUP_CONF(Signal* signal)
1821 {
1822 jamEntry();
1823 BackupConf * conf = (BackupConf*)signal->getDataPtr();
1824
1825 ndbout_c("Backup %u has started", conf->backupId);
1826 }
1827
1828 void
execBACKUP_REF(Signal * signal)1829 Backup::execBACKUP_REF(Signal* signal)
1830 {
1831 jamEntry();
1832 BackupRef * ref = (BackupRef*)signal->getDataPtr();
1833
1834 ndbout_c("Backup (%u) has NOT started %d", ref->senderData, ref->errorCode);
1835 }
1836
1837 void
execBACKUP_COMPLETE_REP(Signal * signal)1838 Backup::execBACKUP_COMPLETE_REP(Signal* signal)
1839 {
1840 jamEntry();
1841 BackupCompleteRep* rep = (BackupCompleteRep*)signal->getDataPtr();
1842
1843 const NDB_TICKS now = NdbTick_getCurrentTicks();
1844 const Uint64 elapsed = NdbTick_Elapsed(startTime,now).milliSec();
1845
1846 ndbout_c("Backup %u has completed", rep->backupId);
1847 const Uint64 bytes =
1848 rep->noOfBytesLow + (((Uint64)rep->noOfBytesHigh) << 32);
1849 const Uint64 records =
1850 rep->noOfRecordsLow + (((Uint64)rep->noOfRecordsHigh) << 32);
1851
1852 Number rps = xps(records, elapsed);
1853 Number bps = xps(bytes, elapsed);
1854
1855 ndbout << " Data [ "
1856 << Number(records) << " rows "
1857 << Number(bytes) << " bytes " << elapsed << " ms ] "
1858 << " => "
1859 << rps << " row/s & " << bps << "b/s" << endl;
1860
1861 bps = xps(rep->noOfLogBytes, elapsed);
1862 rps = xps(rep->noOfLogRecords, elapsed);
1863
1864 ndbout << " Log [ "
1865 << Number(rep->noOfLogRecords) << " log records "
1866 << Number(rep->noOfLogBytes) << " bytes " << elapsed << " ms ] "
1867 << " => "
1868 << rps << " records/s & " << bps << "b/s" << endl;
1869
1870 }
1871
1872 void
execBACKUP_ABORT_REP(Signal * signal)1873 Backup::execBACKUP_ABORT_REP(Signal* signal)
1874 {
1875 jamEntry();
1876 BackupAbortRep* rep = (BackupAbortRep*)signal->getDataPtr();
1877
1878 ndbout_c("Backup %u has been aborted %d", rep->backupId, rep->reason);
1879 }
1880
1881 const TriggerEvent::Value triggerEventValues[] = {
1882 TriggerEvent::TE_INSERT,
1883 TriggerEvent::TE_UPDATE,
1884 TriggerEvent::TE_DELETE
1885 };
1886
1887 const Backup::State
1888 Backup::validSlaveTransitions[] = {
1889 INITIAL, DEFINING,
1890 DEFINING, DEFINED,
1891 DEFINED, STARTED,
1892 STARTED, STARTED, // Several START_BACKUP_REQ is sent
1893 STARTED, SCANNING,
1894 SCANNING, STARTED,
1895 STARTED, STOPPING,
1896 STOPPING, CLEANING,
1897 CLEANING, INITIAL,
1898
1899 INITIAL, ABORTING, // Node fail
1900 DEFINING, ABORTING,
1901 DEFINED, ABORTING,
1902 STARTED, ABORTING,
1903 SCANNING, ABORTING,
1904 STOPPING, ABORTING,
1905 CLEANING, ABORTING, // Node fail w/ master takeover
1906 ABORTING, ABORTING, // Slave who initiates ABORT should have this transition
1907
1908 ABORTING, INITIAL,
1909 INITIAL, INITIAL
1910 };
1911
1912 const Uint32
1913 Backup::validSlaveTransitionsCount =
1914 sizeof(Backup::validSlaveTransitions) / sizeof(Backup::State);
1915
1916 void
setState(State newState)1917 Backup::CompoundState::setState(State newState){
1918 bool found = false;
1919 const State currState = state;
1920 for(unsigned i = 0; i<noOfValidTransitions; i+= 2) {
1921 jam();
1922 if(validTransitions[i] == currState &&
1923 validTransitions[i+1] == newState){
1924 jam();
1925 found = true;
1926 break;
1927 }
1928 }
1929
1930 //ndbrequire(found);
1931
1932 if (newState == INITIAL)
1933 abortState = INITIAL;
1934 if(newState == ABORTING && currState != ABORTING) {
1935 jam();
1936 abortState = currState;
1937 }
1938 state = newState;
1939 #ifdef DEBUG_ABORT
1940 if (newState != currState) {
1941 ndbout_c("%u: Old state = %u, new state = %u, abort state = %u",
1942 id, currState, newState, abortState);
1943 }
1944 #endif
1945 }
1946
1947 void
forceState(State newState)1948 Backup::CompoundState::forceState(State newState)
1949 {
1950 const State currState = state;
1951 if (newState == INITIAL)
1952 abortState = INITIAL;
1953 if(newState == ABORTING && currState != ABORTING) {
1954 jam();
1955 abortState = currState;
1956 }
1957 state = newState;
1958 #ifdef DEBUG_ABORT
1959 if (newState != currState) {
1960 ndbout_c("%u: FORCE: Old state = %u, new state = %u, abort state = %u",
1961 id, currState, newState, abortState);
1962 }
1963 #endif
1964 }
1965
Table(ArrayPool<Fragment> & fh)1966 Backup::Table::Table(ArrayPool<Fragment> & fh)
1967 : fragments(fh)
1968 {
1969 triggerIds[0] = ILLEGAL_TRIGGER_ID;
1970 triggerIds[1] = ILLEGAL_TRIGGER_ID;
1971 triggerIds[2] = ILLEGAL_TRIGGER_ID;
1972 triggerAllocated[0] = false;
1973 triggerAllocated[1] = false;
1974 triggerAllocated[2] = false;
1975 }
1976
1977 /*****************************************************************************
1978 *
1979 * Node state handling
1980 *
1981 *****************************************************************************/
1982 void
execNODE_FAILREP(Signal * signal)1983 Backup::execNODE_FAILREP(Signal* signal)
1984 {
1985 jamEntry();
1986
1987 NodeFailRep * rep = (NodeFailRep*)signal->getDataPtr();
1988
1989 bool doStuff = false;
1990 /*
1991 Start by saving important signal data which will be destroyed before the
1992 process is completed.
1993 */
1994 NodeId new_master_node_id = rep->masterNodeId;
1995 Uint32 theFailedNodes[NdbNodeBitmask::Size];
1996 for (Uint32 i = 0; i < NdbNodeBitmask::Size; i++)
1997 theFailedNodes[i] = rep->theNodes[i];
1998
1999 c_masterNodeId = new_master_node_id;
2000
2001 NodePtr nodePtr;
2002 for(c_nodes.first(nodePtr); nodePtr.i != RNIL; c_nodes.next(nodePtr)) {
2003 jam();
2004 if(NdbNodeBitmask::get(theFailedNodes, nodePtr.p->nodeId)){
2005 if(nodePtr.p->alive){
2006 jam();
2007 ndbrequire(c_aliveNodes.get(nodePtr.p->nodeId));
2008 doStuff = true;
2009 } else {
2010 jam();
2011 ndbrequire(!c_aliveNodes.get(nodePtr.p->nodeId));
2012 }//if
2013 nodePtr.p->alive = 0;
2014 c_aliveNodes.clear(nodePtr.p->nodeId);
2015 }//if
2016 }//for
2017
2018 if(!doStuff){
2019 jam();
2020 return;
2021 }//if
2022
2023 #ifdef DEBUG_ABORT
2024 ndbout_c("****************** Node fail rep ******************");
2025 #endif
2026
2027 NodeId newCoordinator = c_masterNodeId;
2028 BackupRecordPtr ptr;
2029 for(c_backups.first(ptr); ptr.i != RNIL; c_backups.next(ptr)) {
2030 jam();
2031 checkNodeFail(signal, ptr, newCoordinator, theFailedNodes);
2032 }
2033
2034 /* Block level cleanup */
2035 for(unsigned i = 1; i < MAX_NDB_NODES; i++) {
2036 jam();
2037 if(NdbNodeBitmask::get(theFailedNodes, i))
2038 {
2039 jam();
2040 Uint32 elementsCleaned = simBlockNodeFailure(signal, i); // No callback
2041 ndbassert(elementsCleaned == 0); // Backup should have no distributed frag signals
2042 (void) elementsCleaned; // Remove compiler warning
2043 }//if
2044 }//for
2045 }
2046
2047 bool
verifyNodesAlive(BackupRecordPtr ptr,const NdbNodeBitmask & aNodeBitMask)2048 Backup::verifyNodesAlive(BackupRecordPtr ptr,
2049 const NdbNodeBitmask& aNodeBitMask)
2050 {
2051 Uint32 version = getNodeInfo(getOwnNodeId()).m_version;
2052 for (Uint32 i = 0; i < MAX_NDB_NODES; i++) {
2053 jam();
2054 if(aNodeBitMask.get(i)) {
2055 if(!c_aliveNodes.get(i)){
2056 jam();
2057 ptr.p->setErrorCode(AbortBackupOrd::BackupFailureDueToNodeFail);
2058 return false;
2059 }//if
2060 if(getNodeInfo(i).m_version != version)
2061 {
2062 jam();
2063 ptr.p->setErrorCode(AbortBackupOrd::IncompatibleVersions);
2064 return false;
2065 }
2066 }//if
2067 }//for
2068 return true;
2069 }
2070
2071 void
checkNodeFail(Signal * signal,BackupRecordPtr ptr,NodeId newCoord,Uint32 theFailedNodes[NdbNodeBitmask::Size])2072 Backup::checkNodeFail(Signal* signal,
2073 BackupRecordPtr ptr,
2074 NodeId newCoord,
2075 Uint32 theFailedNodes[NdbNodeBitmask::Size])
2076 {
2077 NdbNodeBitmask mask;
2078 mask.assign(2, theFailedNodes);
2079
2080 /* Update ptr.p->nodes to be up to date with current alive nodes
2081 */
2082 NodePtr nodePtr;
2083 bool found = false;
2084 for(c_nodes.first(nodePtr); nodePtr.i != RNIL; c_nodes.next(nodePtr)) {
2085 jam();
2086 if(NdbNodeBitmask::get(theFailedNodes, nodePtr.p->nodeId)) {
2087 jam();
2088 if (ptr.p->nodes.get(nodePtr.p->nodeId)) {
2089 jam();
2090 ptr.p->nodes.clear(nodePtr.p->nodeId);
2091 found = true;
2092 }
2093 }//if
2094 }//for
2095
2096 if(!found) {
2097 jam();
2098 return; // failed node is not part of backup process, safe to continue
2099 }
2100
2101 if(mask.get(refToNode(ptr.p->masterRef)))
2102 {
2103 /**
2104 * Master died...abort
2105 */
2106 ptr.p->masterRef = reference();
2107 ptr.p->nodes.clear();
2108 ptr.p->nodes.set(getOwnNodeId());
2109 ptr.p->setErrorCode(AbortBackupOrd::BackupFailureDueToNodeFail);
2110 switch(ptr.p->m_gsn){
2111 case GSN_DEFINE_BACKUP_REQ:
2112 case GSN_START_BACKUP_REQ:
2113 case GSN_BACKUP_FRAGMENT_REQ:
2114 case GSN_STOP_BACKUP_REQ:
2115 // I'm currently processing...reply to self and abort...
2116 ptr.p->masterData.gsn = ptr.p->m_gsn;
2117 ptr.p->masterData.sendCounter = ptr.p->nodes;
2118 return;
2119 case GSN_DEFINE_BACKUP_REF:
2120 case GSN_DEFINE_BACKUP_CONF:
2121 case GSN_START_BACKUP_REF:
2122 case GSN_START_BACKUP_CONF:
2123 case GSN_BACKUP_FRAGMENT_REF:
2124 case GSN_BACKUP_FRAGMENT_CONF:
2125 case GSN_STOP_BACKUP_REF:
2126 case GSN_STOP_BACKUP_CONF:
2127 ptr.p->masterData.gsn = GSN_DEFINE_BACKUP_REQ;
2128 masterAbort(signal, ptr);
2129 return;
2130 case GSN_ABORT_BACKUP_ORD:
2131 // Already aborting
2132 return;
2133 }
2134 }
2135 else if (newCoord == getOwnNodeId())
2136 {
2137 /**
2138 * I'm master for this backup
2139 */
2140 jam();
2141 CRASH_INSERTION((10001));
2142 #ifdef DEBUG_ABORT
2143 ndbout_c("**** Master: Node failed: Master id = %u",
2144 refToNode(ptr.p->masterRef));
2145 #endif
2146
2147 Uint32 gsn, len, pos;
2148 ptr.p->nodes.bitANDC(mask);
2149 switch(ptr.p->masterData.gsn){
2150 case GSN_DEFINE_BACKUP_REQ:
2151 {
2152 DefineBackupRef * ref = (DefineBackupRef*)signal->getDataPtr();
2153 ref->backupPtr = ptr.i;
2154 ref->backupId = ptr.p->backupId;
2155 ref->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail;
2156 gsn= GSN_DEFINE_BACKUP_REF;
2157 len= DefineBackupRef::SignalLength;
2158 pos= Uint32(&ref->nodeId - signal->getDataPtr());
2159 break;
2160 }
2161 case GSN_START_BACKUP_REQ:
2162 {
2163 StartBackupRef * ref = (StartBackupRef*)signal->getDataPtr();
2164 ref->backupPtr = ptr.i;
2165 ref->backupId = ptr.p->backupId;
2166 ref->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail;
2167 gsn= GSN_START_BACKUP_REF;
2168 len= StartBackupRef::SignalLength;
2169 pos= Uint32(&ref->nodeId - signal->getDataPtr());
2170 break;
2171 }
2172 case GSN_BACKUP_FRAGMENT_REQ:
2173 {
2174 BackupFragmentRef * ref = (BackupFragmentRef*)signal->getDataPtr();
2175 ref->backupPtr = ptr.i;
2176 ref->backupId = ptr.p->backupId;
2177 ref->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail;
2178 gsn= GSN_BACKUP_FRAGMENT_REF;
2179 len= BackupFragmentRef::SignalLength;
2180 pos= Uint32(&ref->nodeId - signal->getDataPtr());
2181 break;
2182 }
2183 case GSN_STOP_BACKUP_REQ:
2184 {
2185 StopBackupRef * ref = (StopBackupRef*)signal->getDataPtr();
2186 ref->backupPtr = ptr.i;
2187 ref->backupId = ptr.p->backupId;
2188 ref->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail;
2189 ref->nodeId = getOwnNodeId();
2190 gsn= GSN_STOP_BACKUP_REF;
2191 len= StopBackupRef::SignalLength;
2192 pos= Uint32(&ref->nodeId - signal->getDataPtr());
2193 break;
2194 }
2195 case GSN_WAIT_GCP_REQ:
2196 case GSN_DROP_TRIG_IMPL_REQ:
2197 case GSN_CREATE_TRIG_IMPL_REQ:
2198 case GSN_ALTER_TRIG_IMPL_REQ:
2199 ptr.p->setErrorCode(AbortBackupOrd::BackupFailureDueToNodeFail);
2200 return;
2201 case GSN_UTIL_SEQUENCE_REQ:
2202 case GSN_UTIL_LOCK_REQ:
2203 return;
2204 default:
2205 ndbrequire(false);
2206 }
2207
2208 for(Uint32 i = 0; (i = mask.find(i+1)) != NdbNodeBitmask::NotFound; )
2209 {
2210 signal->theData[pos] = i;
2211 sendSignal(reference(), gsn, signal, len, JBB);
2212 #ifdef DEBUG_ABORT
2213 ndbout_c("sending %d to self from %d", gsn, i);
2214 #endif
2215 }
2216 return;
2217 }//if
2218
2219 /**
2220 * I abort myself as slave if not master
2221 */
2222 CRASH_INSERTION((10021));
2223 }
2224
2225 void
execINCL_NODEREQ(Signal * signal)2226 Backup::execINCL_NODEREQ(Signal* signal)
2227 {
2228 jamEntry();
2229
2230 const Uint32 senderRef = signal->theData[0];
2231 const Uint32 inclNode = signal->theData[1];
2232
2233 NodePtr node;
2234 for(c_nodes.first(node); node.i != RNIL; c_nodes.next(node)) {
2235 jam();
2236 const Uint32 nodeId = node.p->nodeId;
2237 if(inclNode == nodeId){
2238 jam();
2239
2240 ndbrequire(node.p->alive == 0);
2241 ndbrequire(!c_aliveNodes.get(nodeId));
2242
2243 node.p->alive = 1;
2244 c_aliveNodes.set(nodeId);
2245
2246 break;
2247 }//if
2248 }//for
2249 signal->theData[0] = inclNode;
2250 signal->theData[1] = reference();
2251 sendSignal(senderRef, GSN_INCL_NODECONF, signal, 2, JBB);
2252 }
2253
2254 /*****************************************************************************
2255 *
2256 * Master functionallity - Define backup
2257 *
2258 *****************************************************************************/
2259
2260 void
execBACKUP_REQ(Signal * signal)2261 Backup::execBACKUP_REQ(Signal* signal)
2262 {
2263 jamEntry();
2264 BackupReq * req = (BackupReq*)signal->getDataPtr();
2265
2266 const Uint32 senderData = req->senderData;
2267 const BlockReference senderRef = signal->senderBlockRef();
2268 const Uint32 dataLen32 = req->backupDataLen; // In 32 bit words
2269 const Uint32 flags = signal->getLength() > 2 ? req->flags : 2;
2270 const Uint32 input_backupId = signal->getLength() > 3 ? req->inputBackupId : 0;
2271
2272 if (getOwnNodeId() != getMasterNodeId())
2273 {
2274 jam();
2275 sendBackupRef(senderRef, flags, signal, senderData,
2276 BackupRef::IAmNotMaster);
2277 return;
2278 }//if
2279
2280 if (c_defaults.m_diskless)
2281 {
2282 jam();
2283 sendBackupRef(senderRef, flags, signal, senderData,
2284 BackupRef::CannotBackupDiskless);
2285 return;
2286 }
2287
2288 if (dataLen32 != 0)
2289 {
2290 jam();
2291 sendBackupRef(senderRef, flags, signal, senderData,
2292 BackupRef::BackupDefinitionNotImplemented);
2293 return;
2294 }//if
2295
2296 #ifdef DEBUG_ABORT
2297 dumpUsedResources();
2298 #endif
2299 /**
2300 * Seize a backup record
2301 */
2302 BackupRecordPtr ptr;
2303 c_backups.seizeFirst(ptr);
2304 if (ptr.i == RNIL)
2305 {
2306 jam();
2307 sendBackupRef(senderRef, flags, signal, senderData,
2308 BackupRef::OutOfBackupRecord);
2309 return;
2310 }//if
2311
2312 ndbrequire(ptr.p->tables.isEmpty());
2313
2314 ptr.p->m_gsn = 0;
2315 ptr.p->errorCode = 0;
2316 ptr.p->clientRef = senderRef;
2317 ptr.p->clientData = senderData;
2318 ptr.p->flags = flags;
2319 ptr.p->masterRef = reference();
2320 ptr.p->nodes = c_aliveNodes;
2321 if (input_backupId)
2322 {
2323 jam();
2324 ptr.p->backupId = input_backupId;
2325 }
2326 else
2327 {
2328 jam();
2329 ptr.p->backupId = 0;
2330 }
2331 ptr.p->backupKey[0] = 0;
2332 ptr.p->backupKey[1] = 0;
2333 ptr.p->backupDataLen = 0;
2334 ptr.p->masterData.errorCode = 0;
2335
2336 ptr.p->masterData.sequence.retriesLeft = 3;
2337 sendUtilSequenceReq(signal, ptr);
2338 }
2339
2340 void
sendUtilSequenceReq(Signal * signal,BackupRecordPtr ptr,Uint32 delay)2341 Backup::sendUtilSequenceReq(Signal* signal, BackupRecordPtr ptr, Uint32 delay)
2342 {
2343 jam();
2344
2345 UtilSequenceReq * utilReq = (UtilSequenceReq*)signal->getDataPtrSend();
2346 ptr.p->masterData.gsn = GSN_UTIL_SEQUENCE_REQ;
2347 utilReq->senderData = ptr.i;
2348 utilReq->sequenceId = NDB_BACKUP_SEQUENCE;
2349
2350 if (ptr.p->backupId)
2351 {
2352 jam();
2353 utilReq->requestType = UtilSequenceReq::SetVal;
2354 utilReq->value = ptr.p->backupId;
2355 }
2356 else
2357 {
2358 jam();
2359 utilReq->requestType = UtilSequenceReq::NextVal;
2360 }
2361
2362 if (delay == 0)
2363 {
2364 jam();
2365 sendSignal(DBUTIL_REF, GSN_UTIL_SEQUENCE_REQ,
2366 signal, UtilSequenceReq::SignalLength, JBB);
2367 }
2368 else
2369 {
2370 jam();
2371 sendSignalWithDelay(DBUTIL_REF, GSN_UTIL_SEQUENCE_REQ,
2372 signal, delay, UtilSequenceReq::SignalLength);
2373 }
2374 }
2375
2376 void
execUTIL_SEQUENCE_REF(Signal * signal)2377 Backup::execUTIL_SEQUENCE_REF(Signal* signal)
2378 {
2379 jamEntry();
2380 BackupRecordPtr ptr;
2381 UtilSequenceRef * utilRef = (UtilSequenceRef*)signal->getDataPtr();
2382 ptr.i = utilRef->senderData;
2383 c_backupPool.getPtr(ptr);
2384 ndbrequire(ptr.p->masterData.gsn == GSN_UTIL_SEQUENCE_REQ);
2385
2386 if (utilRef->errorCode == UtilSequenceRef::TCError)
2387 {
2388 jam();
2389 if (ptr.p->masterData.sequence.retriesLeft > 0)
2390 {
2391 jam();
2392 infoEvent("BACKUP: retrying sequence on error %u",
2393 utilRef->TCErrorCode);
2394 ptr.p->masterData.sequence.retriesLeft--;
2395 sendUtilSequenceReq(signal, ptr, 300);
2396 return;
2397 }
2398 }
2399 warningEvent("BACKUP: aborting due to sequence error (%u, %u)",
2400 utilRef->errorCode,
2401 utilRef->TCErrorCode);
2402
2403 sendBackupRef(signal, ptr, BackupRef::SequenceFailure);
2404 }//execUTIL_SEQUENCE_REF()
2405
2406 void
sendBackupRef(Signal * signal,BackupRecordPtr ptr,Uint32 errorCode)2407 Backup::sendBackupRef(Signal* signal, BackupRecordPtr ptr, Uint32 errorCode)
2408 {
2409 jam();
2410 sendBackupRef(ptr.p->clientRef, ptr.p->flags, signal,
2411 ptr.p->clientData, errorCode);
2412 cleanup(signal, ptr);
2413 }
2414
2415 void
sendBackupRef(BlockReference senderRef,Uint32 flags,Signal * signal,Uint32 senderData,Uint32 errorCode)2416 Backup::sendBackupRef(BlockReference senderRef, Uint32 flags, Signal *signal,
2417 Uint32 senderData, Uint32 errorCode)
2418 {
2419 jam();
2420 if (SEND_BACKUP_STARTED_FLAG(flags))
2421 {
2422 jam();
2423 BackupRef* ref = (BackupRef*)signal->getDataPtrSend();
2424 ref->senderData = senderData;
2425 ref->errorCode = errorCode;
2426 ref->masterRef = numberToRef(BACKUP, getMasterNodeId());
2427 sendSignal(senderRef, GSN_BACKUP_REF, signal, BackupRef::SignalLength, JBB);
2428 }
2429
2430 if (errorCode != BackupRef::IAmNotMaster)
2431 {
2432 jam();
2433 signal->theData[0] = NDB_LE_BackupFailedToStart;
2434 signal->theData[1] = senderRef;
2435 signal->theData[2] = errorCode;
2436 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB);
2437 }
2438 }
2439
2440 void
execUTIL_SEQUENCE_CONF(Signal * signal)2441 Backup::execUTIL_SEQUENCE_CONF(Signal* signal)
2442 {
2443 jamEntry();
2444
2445 UtilSequenceConf * conf = (UtilSequenceConf*)signal->getDataPtr();
2446
2447 if(conf->requestType == UtilSequenceReq::Create)
2448 {
2449 jam();
2450 sendSTTORRY(signal); // At startup in NDB
2451 return;
2452 }
2453
2454 BackupRecordPtr ptr;
2455 ptr.i = conf->senderData;
2456 c_backupPool.getPtr(ptr);
2457
2458 ndbrequire(ptr.p->masterData.gsn == GSN_UTIL_SEQUENCE_REQ);
2459
2460 if (ptr.p->checkError())
2461 {
2462 jam();
2463 sendBackupRef(signal, ptr, ptr.p->errorCode);
2464 return;
2465 }//if
2466
2467 if (ERROR_INSERTED(10023))
2468 {
2469 sendBackupRef(signal, ptr, 323);
2470 return;
2471 }//if
2472
2473
2474 if(!ptr.p->backupId && conf->requestType != UtilSequenceReq::SetVal)
2475 {
2476 Uint64 backupId;
2477 memcpy(&backupId,conf->sequenceValue,8);
2478 ptr.p->backupId= (Uint32)backupId;
2479 }
2480
2481 ptr.p->backupKey[0] = (getOwnNodeId() << 16) | (ptr.p->backupId & 0xFFFF);
2482 ptr.p->backupKey[1] = Uint32(NdbTick_CurrentMillisecond());
2483
2484 ptr.p->masterData.gsn = GSN_UTIL_LOCK_REQ;
2485 Mutex mutex(signal, c_mutexMgr, ptr.p->masterData.m_defineBackupMutex);
2486 Callback c = { safe_cast(&Backup::defineBackupMutex_locked), ptr.i };
2487 ndbrequire(mutex.lock(c));
2488
2489 return;
2490 }
2491
2492 void
defineBackupMutex_locked(Signal * signal,Uint32 ptrI,Uint32 retVal)2493 Backup::defineBackupMutex_locked(Signal* signal, Uint32 ptrI, Uint32 retVal){
2494 jamEntry();
2495 ndbrequire(retVal == 0);
2496
2497 BackupRecordPtr ptr;
2498 ptr.i = ptrI;
2499 c_backupPool.getPtr(ptr);
2500
2501 ndbrequire(ptr.p->masterData.gsn == GSN_UTIL_LOCK_REQ);
2502
2503 ptr.p->masterData.gsn = GSN_UTIL_LOCK_REQ;
2504 Mutex mutex(signal, c_mutexMgr, ptr.p->masterData.m_dictCommitTableMutex);
2505 Callback c = { safe_cast(&Backup::dictCommitTableMutex_locked), ptr.i };
2506 ndbrequire(mutex.lock(c));
2507 }
2508
2509 void
dictCommitTableMutex_locked(Signal * signal,Uint32 ptrI,Uint32 retVal)2510 Backup::dictCommitTableMutex_locked(Signal* signal, Uint32 ptrI,Uint32 retVal)
2511 {
2512 jamEntry();
2513 ndbrequire(retVal == 0);
2514
2515 /**
2516 * We now have both the mutexes
2517 */
2518 BackupRecordPtr ptr;
2519 ptr.i = ptrI;
2520 c_backupPool.getPtr(ptr);
2521
2522 ndbrequire(ptr.p->masterData.gsn == GSN_UTIL_LOCK_REQ);
2523
2524 if (ERROR_INSERTED(10031)) {
2525 ptr.p->setErrorCode(331);
2526 }//if
2527
2528 if (ptr.p->checkError())
2529 {
2530 jam();
2531
2532 /**
2533 * Unlock mutexes
2534 */
2535 jam();
2536 Mutex mutex1(signal, c_mutexMgr, ptr.p->masterData.m_dictCommitTableMutex);
2537 jam();
2538 mutex1.unlock(); // ignore response
2539
2540 jam();
2541 Mutex mutex2(signal, c_mutexMgr, ptr.p->masterData.m_defineBackupMutex);
2542 jam();
2543 mutex2.unlock(); // ignore response
2544
2545 sendBackupRef(signal, ptr, ptr.p->errorCode);
2546 return;
2547 }//if
2548
2549 sendDefineBackupReq(signal, ptr);
2550 }
2551
2552 /*****************************************************************************
2553 *
2554 * Master functionallity - Define backup cont'd (from now on all slaves are in)
2555 *
2556 *****************************************************************************/
2557
2558 bool
haveAllSignals(BackupRecordPtr ptr,Uint32 gsn,Uint32 nodeId)2559 Backup::haveAllSignals(BackupRecordPtr ptr, Uint32 gsn, Uint32 nodeId)
2560 {
2561 ndbrequire(ptr.p->masterRef == reference());
2562 ndbrequire(ptr.p->masterData.gsn == gsn);
2563 ndbrequire(!ptr.p->masterData.sendCounter.done());
2564 ndbrequire(ptr.p->masterData.sendCounter.isWaitingFor(nodeId));
2565
2566 ptr.p->masterData.sendCounter.clearWaitingFor(nodeId);
2567 return ptr.p->masterData.sendCounter.done();
2568 }
2569
2570 void
sendDefineBackupReq(Signal * signal,BackupRecordPtr ptr)2571 Backup::sendDefineBackupReq(Signal *signal, BackupRecordPtr ptr)
2572 {
2573 /**
2574 * Sending define backup to all participants
2575 */
2576 DefineBackupReq * req = (DefineBackupReq*)signal->getDataPtrSend();
2577 req->backupId = ptr.p->backupId;
2578 req->clientRef = ptr.p->clientRef;
2579 req->clientData = ptr.p->clientData;
2580 req->senderRef = reference();
2581 req->backupPtr = ptr.i;
2582 req->backupKey[0] = ptr.p->backupKey[0];
2583 req->backupKey[1] = ptr.p->backupKey[1];
2584 req->nodes = ptr.p->nodes;
2585 req->backupDataLen = ptr.p->backupDataLen;
2586 req->flags = ptr.p->flags;
2587
2588 ptr.p->masterData.gsn = GSN_DEFINE_BACKUP_REQ;
2589 ptr.p->masterData.sendCounter = ptr.p->nodes;
2590 BlockNumber backupBlockNo = numberToBlock(BACKUP, instanceKey(ptr));
2591 NodeReceiverGroup rg(backupBlockNo, ptr.p->nodes);
2592 sendSignal(rg, GSN_DEFINE_BACKUP_REQ, signal,
2593 DefineBackupReq::SignalLength, JBB);
2594
2595 /**
2596 * Now send backup data
2597 */
2598 const Uint32 len = ptr.p->backupDataLen;
2599 if(len == 0){
2600 /**
2601 * No data to send
2602 */
2603 jam();
2604 return;
2605 }//if
2606
2607 /**
2608 * Not implemented
2609 */
2610 ndbrequire(0);
2611 }
2612
2613 void
execDEFINE_BACKUP_REF(Signal * signal)2614 Backup::execDEFINE_BACKUP_REF(Signal* signal)
2615 {
2616 jamEntry();
2617
2618 DefineBackupRef* ref = (DefineBackupRef*)signal->getDataPtr();
2619
2620 const Uint32 ptrI = ref->backupPtr;
2621 //const Uint32 backupId = ref->backupId;
2622 const Uint32 nodeId = ref->nodeId;
2623
2624 BackupRecordPtr ptr;
2625 c_backupPool.getPtr(ptr, ptrI);
2626
2627 ptr.p->setErrorCode(ref->errorCode);
2628 defineBackupReply(signal, ptr, nodeId);
2629 }
2630
2631 void
execDEFINE_BACKUP_CONF(Signal * signal)2632 Backup::execDEFINE_BACKUP_CONF(Signal* signal)
2633 {
2634 jamEntry();
2635
2636 DefineBackupConf* conf = (DefineBackupConf*)signal->getDataPtr();
2637 const Uint32 ptrI = conf->backupPtr;
2638 //const Uint32 backupId = conf->backupId;
2639 const Uint32 nodeId = refToNode(signal->senderBlockRef());
2640
2641 BackupRecordPtr ptr;
2642 c_backupPool.getPtr(ptr, ptrI);
2643
2644 if (ERROR_INSERTED(10024))
2645 {
2646 ptr.p->setErrorCode(324);
2647 }
2648
2649 defineBackupReply(signal, ptr, nodeId);
2650 }
2651
2652 void
defineBackupReply(Signal * signal,BackupRecordPtr ptr,Uint32 nodeId)2653 Backup::defineBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId)
2654 {
2655 if (!haveAllSignals(ptr, GSN_DEFINE_BACKUP_REQ, nodeId)) {
2656 jam();
2657 return;
2658 }
2659
2660 /**
2661 * Unlock mutexes
2662 */
2663 jam();
2664 Mutex mutex1(signal, c_mutexMgr, ptr.p->masterData.m_dictCommitTableMutex);
2665 jam();
2666 mutex1.unlock(); // ignore response
2667
2668 jam();
2669 Mutex mutex2(signal, c_mutexMgr, ptr.p->masterData.m_defineBackupMutex);
2670 jam();
2671 mutex2.unlock(); // ignore response
2672
2673 if(ptr.p->checkError())
2674 {
2675 jam();
2676 masterAbort(signal, ptr);
2677 return;
2678 }
2679
2680 CRASH_INSERTION((10034));
2681
2682 /**
2683 * We've received GSN_DEFINE_BACKUP_CONF from all participants.
2684 *
2685 * Our next step is to send START_BACKUP_REQ to all participants,
2686 * who will then send CREATE_TRIG_REQ for all tables to their local
2687 * DBTUP.
2688 */
2689 TablePtr tabPtr;
2690 ptr.p->tables.first(tabPtr);
2691
2692 sendStartBackup(signal, ptr, tabPtr);
2693 }
2694
2695 /*****************************************************************************
2696 *
2697 * Master functionallity - Prepare triggers
2698 *
2699 *****************************************************************************/
2700 void
createAttributeMask(TablePtr tabPtr,Bitmask<MAXNROFATTRIBUTESINWORDS> & mask)2701 Backup::createAttributeMask(TablePtr tabPtr,
2702 Bitmask<MAXNROFATTRIBUTESINWORDS> & mask)
2703 {
2704 mask.clear();
2705 for (Uint32 i = 0; i<tabPtr.p->noOfAttributes; i++)
2706 mask.set(i);
2707 }
2708
2709 void
sendCreateTrig(Signal * signal,BackupRecordPtr ptr,TablePtr tabPtr)2710 Backup::sendCreateTrig(Signal* signal,
2711 BackupRecordPtr ptr, TablePtr tabPtr)
2712 {
2713 CreateTrigImplReq* req = (CreateTrigImplReq*)signal->getDataPtrSend();
2714
2715 /*
2716 * First, setup the structures
2717 */
2718 for(Uint32 j=0; j<3; j++) {
2719 jam();
2720
2721 TriggerPtr trigPtr;
2722 if (!ptr.p->triggers.seizeFirst(trigPtr)) {
2723 jam();
2724 ptr.p->m_gsn = GSN_START_BACKUP_REF;
2725 StartBackupRef* ref = (StartBackupRef*)signal->getDataPtrSend();
2726 ref->backupPtr = ptr.i;
2727 ref->backupId = ptr.p->backupId;
2728 ref->errorCode = StartBackupRef::FailedToAllocateTriggerRecord;
2729 ref->nodeId = getOwnNodeId();
2730 sendSignal(ptr.p->masterRef, GSN_START_BACKUP_REF, signal,
2731 StartBackupRef::SignalLength, JBB);
2732 return;
2733 } // if
2734
2735 const Uint32 triggerId= trigPtr.i;
2736 tabPtr.p->triggerIds[j] = triggerId;
2737 tabPtr.p->triggerAllocated[j] = true;
2738 trigPtr.p->backupPtr = ptr.i;
2739 trigPtr.p->tableId = tabPtr.p->tableId;
2740 trigPtr.p->tab_ptr_i = tabPtr.i;
2741 trigPtr.p->logEntry = 0;
2742 trigPtr.p->event = j;
2743 trigPtr.p->maxRecordSize = 4096;
2744 trigPtr.p->operation =
2745 &ptr.p->files.getPtr(ptr.p->logFilePtr)->operation;
2746 trigPtr.p->operation->noOfBytes = 0;
2747 trigPtr.p->operation->noOfRecords = 0;
2748 trigPtr.p->errorCode = 0;
2749 } // for
2750
2751 /*
2752 * now ask DBTUP to create
2753 */
2754 ptr.p->slaveData.gsn = GSN_CREATE_TRIG_IMPL_REQ;
2755 ptr.p->slaveData.trigSendCounter = 3;
2756 ptr.p->slaveData.createTrig.tableId = tabPtr.p->tableId;
2757
2758 req->senderRef = reference();
2759 req->receiverRef = reference();
2760 req->senderData = ptr.i;
2761 req->requestType = 0;
2762
2763 Bitmask<MAXNROFATTRIBUTESINWORDS> attrMask;
2764 createAttributeMask(tabPtr, attrMask);
2765
2766 req->tableId = tabPtr.p->tableId;
2767 req->tableVersion = 0;
2768 req->indexId = RNIL;
2769 req->indexVersion = 0;
2770
2771 Uint32 ti = 0;
2772 /*
2773 * We always send PK for any operations and any triggertypes.
2774 * For SUBSCRIPTION_BEFORE
2775 * We send after image for INSERT.
2776 * We send before image for DELETE.
2777 * We send before+after image for UPDATE.
2778 * For SUBSCRIPTION
2779 * We send after image for INSERT.
2780 * We send only PK for DELETE.
2781 * We send after image for UPDATE.
2782 */
2783 if(ptr.p->flags & BackupReq::USE_UNDO_LOG)
2784 TriggerInfo::setTriggerType(ti, TriggerType::SUBSCRIPTION_BEFORE);
2785 else
2786 TriggerInfo::setTriggerType(ti, TriggerType::SUBSCRIPTION);
2787 TriggerInfo::setTriggerActionTime(ti, TriggerActionTime::TA_DETACHED);
2788 TriggerInfo::setMonitorReplicas(ti, true);
2789 TriggerInfo::setMonitorAllAttributes(ti, false);
2790
2791 for (int i=0; i < 3; i++) {
2792 req->triggerId = tabPtr.p->triggerIds[i];
2793
2794 Uint32 ti2 = ti;
2795 TriggerInfo::setTriggerEvent(ti2, triggerEventValues[i]);
2796 req->triggerInfo = ti2;
2797
2798 LinearSectionPtr ptr[3];
2799 ptr[0].p = attrMask.rep.data;
2800 ptr[0].sz = attrMask.getSizeInWords();
2801
2802 sendSignal(DBTUP_REF, GSN_CREATE_TRIG_IMPL_REQ,
2803 signal, CreateTrigImplReq::SignalLength, JBB, ptr ,1);
2804 }
2805 }
2806
2807 void
execCREATE_TRIG_IMPL_CONF(Signal * signal)2808 Backup::execCREATE_TRIG_IMPL_CONF(Signal* signal)
2809 {
2810 jamEntry();
2811 const CreateTrigImplConf* conf =
2812 (const CreateTrigImplConf*)signal->getDataPtr();
2813
2814 const Uint32 ptrI = conf->senderData;
2815 const Uint32 tableId = conf->tableId;
2816 const TriggerEvent::Value type =
2817 TriggerInfo::getTriggerEvent(conf->triggerInfo);
2818
2819 BackupRecordPtr ptr;
2820 c_backupPool.getPtr(ptr, ptrI);
2821
2822 /**
2823 * Verify that I'm waiting for this conf
2824 *
2825 * ptr.p->masterRef != reference()
2826 * as slaves and masters have triggers now.
2827 */
2828 ndbrequire(ptr.p->slaveData.gsn == GSN_CREATE_TRIG_IMPL_REQ);
2829 ndbrequire(ptr.p->slaveData.trigSendCounter.done() == false);
2830 ndbrequire(ptr.p->slaveData.createTrig.tableId == tableId);
2831
2832 TablePtr tabPtr;
2833 ndbrequire(findTable(ptr, tabPtr, tableId));
2834 ndbrequire(type < 3); // if some decides to change the enums
2835
2836 createTrigReply(signal, ptr);
2837 }
2838
2839 void
execCREATE_TRIG_IMPL_REF(Signal * signal)2840 Backup::execCREATE_TRIG_IMPL_REF(Signal* signal)
2841 {
2842 jamEntry();
2843 const CreateTrigImplRef* ref =
2844 (const CreateTrigImplRef*)signal->getDataPtr();
2845
2846 const Uint32 ptrI = ref->senderData;
2847 const Uint32 tableId = ref->tableId;
2848
2849 BackupRecordPtr ptr;
2850 c_backupPool.getPtr(ptr, ptrI);
2851
2852 /**
2853 * Verify that I'm waiting for this ref
2854 *
2855 * ptr.p->masterRef != reference()
2856 * as slaves and masters have triggers now
2857 */
2858 ndbrequire(ptr.p->slaveData.gsn == GSN_CREATE_TRIG_IMPL_REQ);
2859 ndbrequire(ptr.p->slaveData.trigSendCounter.done() == false);
2860 ndbrequire(ptr.p->slaveData.createTrig.tableId == tableId);
2861
2862 ptr.p->setErrorCode(ref->errorCode);
2863
2864 createTrigReply(signal, ptr);
2865 }
2866
2867 void
createTrigReply(Signal * signal,BackupRecordPtr ptr)2868 Backup::createTrigReply(Signal* signal, BackupRecordPtr ptr)
2869 {
2870 CRASH_INSERTION(10003);
2871
2872 /**
2873 * Check finished with table
2874 */
2875 ptr.p->slaveData.trigSendCounter--;
2876 if(ptr.p->slaveData.trigSendCounter.done() == false){
2877 jam();
2878 return;
2879 }//if
2880
2881 if (ERROR_INSERTED(10025))
2882 {
2883 ptr.p->errorCode = 325;
2884 }
2885
2886 if(ptr.p->checkError()) {
2887 jam();
2888 ptr.p->m_gsn = GSN_START_BACKUP_REF;
2889 StartBackupRef* ref = (StartBackupRef*)signal->getDataPtrSend();
2890 ref->backupPtr = ptr.i;
2891 ref->backupId = ptr.p->backupId;
2892 ref->errorCode = ptr.p->errorCode;
2893 ref->nodeId = getOwnNodeId();
2894 sendSignal(ptr.p->masterRef, GSN_START_BACKUP_REF, signal,
2895 StartBackupRef::SignalLength, JBB);
2896 return;
2897 }//if
2898
2899 TablePtr tabPtr;
2900 ndbrequire(findTable(ptr, tabPtr, ptr.p->slaveData.createTrig.tableId));
2901
2902 /**
2903 * Next table
2904 */
2905 ptr.p->tables.next(tabPtr);
2906 if(tabPtr.i != RNIL){
2907 jam();
2908 sendCreateTrig(signal, ptr, tabPtr);
2909 return;
2910 }//if
2911
2912 /**
2913 * We've finished creating triggers.
2914 *
2915 * send conf and wait
2916 */
2917 ptr.p->m_gsn = GSN_START_BACKUP_CONF;
2918 StartBackupConf* conf = (StartBackupConf*)signal->getDataPtrSend();
2919 conf->backupPtr = ptr.i;
2920 conf->backupId = ptr.p->backupId;
2921 sendSignal(ptr.p->masterRef, GSN_START_BACKUP_CONF, signal,
2922 StartBackupConf::SignalLength, JBB);
2923 }
2924
2925 /*****************************************************************************
2926 *
2927 * Master functionallity - Start backup
2928 *
2929 *****************************************************************************/
2930 void
sendStartBackup(Signal * signal,BackupRecordPtr ptr,TablePtr tabPtr)2931 Backup::sendStartBackup(Signal* signal, BackupRecordPtr ptr, TablePtr tabPtr)
2932 {
2933
2934 ptr.p->masterData.startBackup.tablePtr = tabPtr.i;
2935
2936 StartBackupReq* req = (StartBackupReq*)signal->getDataPtrSend();
2937 req->backupId = ptr.p->backupId;
2938 req->backupPtr = ptr.i;
2939
2940 /**
2941 * We use trigger Ids that are unique to BACKUP.
2942 * These don't interfere with other triggers (e.g. from DBDICT)
2943 * as there is a special case in DBTUP.
2944 *
2945 * Consequently, backups during online upgrade won't work
2946 */
2947 ptr.p->masterData.gsn = GSN_START_BACKUP_REQ;
2948 ptr.p->masterData.sendCounter = ptr.p->nodes;
2949 BlockNumber backupBlockNo = numberToBlock(BACKUP, instanceKey(ptr));
2950 NodeReceiverGroup rg(backupBlockNo, ptr.p->nodes);
2951 sendSignal(rg, GSN_START_BACKUP_REQ, signal,
2952 StartBackupReq::SignalLength, JBB);
2953 }
2954
2955 void
execSTART_BACKUP_REF(Signal * signal)2956 Backup::execSTART_BACKUP_REF(Signal* signal)
2957 {
2958 jamEntry();
2959
2960 StartBackupRef* ref = (StartBackupRef*)signal->getDataPtr();
2961 const Uint32 ptrI = ref->backupPtr;
2962 //const Uint32 backupId = ref->backupId;
2963 const Uint32 nodeId = ref->nodeId;
2964
2965 BackupRecordPtr ptr;
2966 c_backupPool.getPtr(ptr, ptrI);
2967
2968 ptr.p->setErrorCode(ref->errorCode);
2969 startBackupReply(signal, ptr, nodeId);
2970 }
2971
2972 void
execSTART_BACKUP_CONF(Signal * signal)2973 Backup::execSTART_BACKUP_CONF(Signal* signal)
2974 {
2975 jamEntry();
2976
2977 StartBackupConf* conf = (StartBackupConf*)signal->getDataPtr();
2978 const Uint32 ptrI = conf->backupPtr;
2979 //const Uint32 backupId = conf->backupId;
2980 const Uint32 nodeId = refToNode(signal->senderBlockRef());
2981
2982 BackupRecordPtr ptr;
2983 c_backupPool.getPtr(ptr, ptrI);
2984
2985 startBackupReply(signal, ptr, nodeId);
2986 }
2987
2988 void
startBackupReply(Signal * signal,BackupRecordPtr ptr,Uint32 nodeId)2989 Backup::startBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId)
2990 {
2991
2992 CRASH_INSERTION((10004));
2993
2994 if (!haveAllSignals(ptr, GSN_START_BACKUP_REQ, nodeId)) {
2995 jam();
2996 return;
2997 }
2998
2999 if (ERROR_INSERTED(10026))
3000 {
3001 ptr.p->errorCode = 326;
3002 }
3003
3004 if(ptr.p->checkError()){
3005 jam();
3006 masterAbort(signal, ptr);
3007 return;
3008 }
3009
3010 /*
3011 * We reply to client after create trigger
3012 */
3013 if (SEND_BACKUP_STARTED_FLAG(ptr.p->flags))
3014 {
3015 BackupConf * conf = (BackupConf*)signal->getDataPtrSend();
3016 conf->backupId = ptr.p->backupId;
3017 conf->senderData = ptr.p->clientData;
3018 conf->nodes = ptr.p->nodes;
3019 sendSignal(ptr.p->clientRef, GSN_BACKUP_CONF, signal,
3020 BackupConf::SignalLength, JBB);
3021 }
3022
3023 signal->theData[0] = NDB_LE_BackupStarted;
3024 signal->theData[1] = ptr.p->clientRef;
3025 signal->theData[2] = ptr.p->backupId;
3026 ptr.p->nodes.copyto(NdbNodeBitmask::Size, signal->theData+3);
3027 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3+NdbNodeBitmask::Size, JBB);
3028
3029 /**
3030 * Wait for GCP
3031 */
3032 ptr.p->masterData.gsn = GSN_WAIT_GCP_REQ;
3033 ptr.p->masterData.waitGCP.startBackup = true;
3034
3035 WaitGCPReq * waitGCPReq = (WaitGCPReq*)signal->getDataPtrSend();
3036 waitGCPReq->senderRef = reference();
3037 waitGCPReq->senderData = ptr.i;
3038 waitGCPReq->requestType = WaitGCPReq::CompleteForceStart;
3039 //we delay 10 seconds for testcases to generate events to be recorded in the UNDO log
3040 if (ERROR_INSERTED(10041))
3041 {
3042 sendSignalWithDelay(DBDIH_REF, GSN_WAIT_GCP_REQ, signal, 10*1000, WaitGCPReq::SignalLength);
3043 }
3044 else
3045 sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal,
3046 WaitGCPReq::SignalLength,JBB);
3047 }
3048
3049 void
execWAIT_GCP_REF(Signal * signal)3050 Backup::execWAIT_GCP_REF(Signal* signal)
3051 {
3052 jamEntry();
3053
3054 CRASH_INSERTION((10006));
3055
3056 WaitGCPRef * ref = (WaitGCPRef*)signal->getDataPtr();
3057 const Uint32 ptrI = ref->senderData;
3058
3059 BackupRecordPtr ptr;
3060 c_backupPool.getPtr(ptr, ptrI);
3061
3062 ndbrequire(ptr.p->masterRef == reference());
3063 ndbrequire(ptr.p->masterData.gsn == GSN_WAIT_GCP_REQ);
3064
3065 WaitGCPReq * req = (WaitGCPReq*)signal->getDataPtrSend();
3066 req->senderRef = reference();
3067 req->senderData = ptr.i;
3068 req->requestType = WaitGCPReq::CompleteForceStart;
3069 sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal,
3070 WaitGCPReq::SignalLength,JBB);
3071 }
3072
3073 void
execWAIT_GCP_CONF(Signal * signal)3074 Backup::execWAIT_GCP_CONF(Signal* signal){
3075 jamEntry();
3076
3077 CRASH_INSERTION((10007));
3078
3079 WaitGCPConf * conf = (WaitGCPConf*)signal->getDataPtr();
3080 const Uint32 ptrI = conf->senderData;
3081 const Uint32 gcp = conf->gci_hi;
3082
3083 BackupRecordPtr ptr;
3084 c_backupPool.getPtr(ptr, ptrI);
3085
3086 ndbrequire(ptr.p->masterRef == reference());
3087 ndbrequire(ptr.p->masterData.gsn == GSN_WAIT_GCP_REQ);
3088
3089 if(ptr.p->checkError()) {
3090 jam();
3091 masterAbort(signal, ptr);
3092 return;
3093 }//if
3094
3095 if(ptr.p->masterData.waitGCP.startBackup) {
3096 jam();
3097 CRASH_INSERTION((10008));
3098 ptr.p->startGCP = gcp;
3099 ptr.p->masterData.sendCounter= 0;
3100 ptr.p->masterData.gsn = GSN_BACKUP_FRAGMENT_REQ;
3101 nextFragment(signal, ptr);
3102 return;
3103 } else {
3104 jam();
3105 if(gcp >= ptr.p->startGCP + 3)
3106 {
3107 CRASH_INSERTION((10009));
3108 ptr.p->stopGCP = gcp;
3109 /**
3110 * Backup is complete - begin cleanup
3111 * STOP_BACKUP_REQ is sent to participants.
3112 * They then drop the local triggers
3113 */
3114 sendStopBackup(signal, ptr);
3115 return;
3116 }//if
3117
3118 /**
3119 * Make sure that we got entire stopGCP
3120 */
3121 WaitGCPReq * req = (WaitGCPReq*)signal->getDataPtrSend();
3122 req->senderRef = reference();
3123 req->senderData = ptr.i;
3124 req->requestType = WaitGCPReq::CompleteForceStart;
3125 sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal,
3126 WaitGCPReq::SignalLength,JBB);
3127 return;
3128 }
3129 }
3130
3131 /*****************************************************************************
3132 *
3133 * Master functionallity - Backup fragment
3134 *
3135 *****************************************************************************/
3136 void
nextFragment(Signal * signal,BackupRecordPtr ptr)3137 Backup::nextFragment(Signal* signal, BackupRecordPtr ptr)
3138 {
3139 jam();
3140
3141 BackupFragmentReq* req = (BackupFragmentReq*)signal->getDataPtrSend();
3142 req->backupPtr = ptr.i;
3143 req->backupId = ptr.p->backupId;
3144
3145 NdbNodeBitmask nodes = ptr.p->nodes;
3146 Uint32 idleNodes = nodes.count();
3147 Uint32 saveIdleNodes = idleNodes;
3148 ndbrequire(idleNodes > 0);
3149
3150 TablePtr tabPtr;
3151 ptr.p->tables.first(tabPtr);
3152 for(; tabPtr.i != RNIL && idleNodes > 0; ptr.p->tables.next(tabPtr)) {
3153 jam();
3154 FragmentPtr fragPtr;
3155 Array<Fragment> & frags = tabPtr.p->fragments;
3156 const Uint32 fragCount = frags.getSize();
3157
3158 for(Uint32 i = 0; i<fragCount && idleNodes > 0; i++) {
3159 jam();
3160 tabPtr.p->fragments.getPtr(fragPtr, i);
3161 const Uint32 nodeId = fragPtr.p->node;
3162 if(fragPtr.p->scanning != 0) {
3163 jam();
3164 ndbrequire(nodes.get(nodeId));
3165 nodes.clear(nodeId);
3166 idleNodes--;
3167 } else if(fragPtr.p->scanned == 0 && nodes.get(nodeId)){
3168 jam();
3169 fragPtr.p->scanning = 1;
3170 nodes.clear(nodeId);
3171 idleNodes--;
3172
3173 req->tableId = tabPtr.p->tableId;
3174 req->fragmentNo = i;
3175 req->count = 0;
3176
3177 ptr.p->masterData.sendCounter++;
3178 BlockReference ref = numberToRef(BACKUP, instanceKey(ptr), nodeId);
3179 sendSignal(ref, GSN_BACKUP_FRAGMENT_REQ, signal,
3180 BackupFragmentReq::SignalLength, JBB);
3181 }//if
3182 }//for
3183 }//for
3184
3185 if(idleNodes != saveIdleNodes){
3186 jam();
3187 return;
3188 }//if
3189
3190 /**
3191 * Finished with all tables
3192 */
3193 {
3194 ptr.p->masterData.gsn = GSN_WAIT_GCP_REQ;
3195 ptr.p->masterData.waitGCP.startBackup = false;
3196
3197 WaitGCPReq * req = (WaitGCPReq*)signal->getDataPtrSend();
3198 req->senderRef = reference();
3199 req->senderData = ptr.i;
3200 req->requestType = WaitGCPReq::CompleteForceStart;
3201 sendSignal(DBDIH_REF, GSN_WAIT_GCP_REQ, signal,
3202 WaitGCPReq::SignalLength, JBB);
3203 }
3204 }
3205
3206 void
execBACKUP_FRAGMENT_CONF(Signal * signal)3207 Backup::execBACKUP_FRAGMENT_CONF(Signal* signal)
3208 {
3209 jamEntry();
3210
3211 CRASH_INSERTION((10010));
3212
3213 BackupFragmentConf * conf = (BackupFragmentConf*)signal->getDataPtr();
3214 const Uint32 ptrI = conf->backupPtr;
3215 //const Uint32 backupId = conf->backupId;
3216 const Uint32 tableId = conf->tableId;
3217 const Uint32 fragmentNo = conf->fragmentNo;
3218 const Uint32 nodeId = refToNode(signal->senderBlockRef());
3219 const Uint64 noOfBytes =
3220 conf->noOfBytesLow + (((Uint64)conf->noOfBytesHigh) << 32);
3221 const Uint64 noOfRecords =
3222 conf->noOfRecordsLow + (((Uint64)conf->noOfRecordsHigh) << 32);
3223
3224 BackupRecordPtr ptr;
3225 c_backupPool.getPtr(ptr, ptrI);
3226
3227 ptr.p->noOfBytes += noOfBytes;
3228 ptr.p->noOfRecords += noOfRecords;
3229 ptr.p->masterData.sendCounter--;
3230
3231 TablePtr tabPtr;
3232 ndbrequire(findTable(ptr, tabPtr, tableId));
3233
3234 tabPtr.p->noOfRecords += noOfRecords;
3235
3236 FragmentPtr fragPtr;
3237 tabPtr.p->fragments.getPtr(fragPtr, fragmentNo);
3238
3239 fragPtr.p->noOfRecords = noOfRecords;
3240
3241 ndbrequire(fragPtr.p->scanned == 0);
3242 ndbrequire(fragPtr.p->scanning == 1);
3243 ndbrequire(fragPtr.p->node == nodeId);
3244
3245 fragPtr.p->scanned = 1;
3246 fragPtr.p->scanning = 0;
3247
3248 if (ERROR_INSERTED(10028))
3249 {
3250 ptr.p->errorCode = 328;
3251 }
3252
3253 if(ptr.p->checkError())
3254 {
3255 if(ptr.p->masterData.sendCounter.done())
3256 {
3257 jam();
3258 masterAbort(signal, ptr);
3259 return;
3260 }//if
3261 }
3262 else
3263 {
3264 NdbNodeBitmask nodes = ptr.p->nodes;
3265 nodes.clear(getOwnNodeId());
3266 if (!nodes.isclear())
3267 {
3268 BackupFragmentCompleteRep *rep =
3269 (BackupFragmentCompleteRep*)signal->getDataPtrSend();
3270 rep->backupId = ptr.p->backupId;
3271 rep->backupPtr = ptr.i;
3272 rep->tableId = tableId;
3273 rep->fragmentNo = fragmentNo;
3274 rep->noOfTableRowsLow = (Uint32)(tabPtr.p->noOfRecords & 0xFFFFFFFF);
3275 rep->noOfTableRowsHigh = (Uint32)(tabPtr.p->noOfRecords >> 32);
3276 rep->noOfFragmentRowsLow = (Uint32)(noOfRecords & 0xFFFFFFFF);
3277 rep->noOfFragmentRowsHigh = (Uint32)(noOfRecords >> 32);
3278 BlockNumber backupBlockNo = numberToBlock(BACKUP, instanceKey(ptr));
3279 NodeReceiverGroup rg(backupBlockNo, ptr.p->nodes);
3280 sendSignal(rg, GSN_BACKUP_FRAGMENT_COMPLETE_REP, signal,
3281 BackupFragmentCompleteRep::SignalLength, JBA);
3282 }
3283 nextFragment(signal, ptr);
3284 }
3285 }
3286
3287 void
execBACKUP_FRAGMENT_REF(Signal * signal)3288 Backup::execBACKUP_FRAGMENT_REF(Signal* signal)
3289 {
3290 jamEntry();
3291
3292 CRASH_INSERTION((10011));
3293
3294 BackupFragmentRef * ref = (BackupFragmentRef*)signal->getDataPtr();
3295 const Uint32 ptrI = ref->backupPtr;
3296 //const Uint32 backupId = ref->backupId;
3297 const Uint32 nodeId = ref->nodeId;
3298
3299 BackupRecordPtr ptr;
3300 c_backupPool.getPtr(ptr, ptrI);
3301
3302 TablePtr tabPtr;
3303 ptr.p->tables.first(tabPtr);
3304 for(; tabPtr.i != RNIL; ptr.p->tables.next(tabPtr)) {
3305 jam();
3306 FragmentPtr fragPtr;
3307 Array<Fragment> & frags = tabPtr.p->fragments;
3308 const Uint32 fragCount = frags.getSize();
3309
3310 for(Uint32 i = 0; i<fragCount; i++) {
3311 jam();
3312 tabPtr.p->fragments.getPtr(fragPtr, i);
3313 if(fragPtr.p->scanning != 0 && nodeId == fragPtr.p->node)
3314 {
3315 jam();
3316 ndbrequire(fragPtr.p->scanned == 0);
3317 fragPtr.p->scanned = 1;
3318 fragPtr.p->scanning = 0;
3319 goto done;
3320 }
3321 }
3322 }
3323 goto err;
3324
3325 done:
3326 ptr.p->masterData.sendCounter--;
3327 ptr.p->setErrorCode(ref->errorCode);
3328
3329 if(ptr.p->masterData.sendCounter.done())
3330 {
3331 jam();
3332 masterAbort(signal, ptr);
3333 return;
3334 }//if
3335
3336 err:
3337 AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend();
3338 ord->backupId = ptr.p->backupId;
3339 ord->backupPtr = ptr.i;
3340 ord->requestType = AbortBackupOrd::LogBufferFull;
3341 ord->senderData= ptr.i;
3342 execABORT_BACKUP_ORD(signal);
3343 }
3344
3345 void
execBACKUP_FRAGMENT_COMPLETE_REP(Signal * signal)3346 Backup::execBACKUP_FRAGMENT_COMPLETE_REP(Signal* signal)
3347 {
3348 jamEntry();
3349 BackupFragmentCompleteRep * rep =
3350 (BackupFragmentCompleteRep*)signal->getDataPtr();
3351
3352 BackupRecordPtr ptr;
3353 c_backupPool.getPtr(ptr, rep->backupPtr);
3354
3355 TablePtr tabPtr;
3356 ndbrequire(findTable(ptr, tabPtr, rep->tableId));
3357
3358 tabPtr.p->noOfRecords =
3359 rep->noOfTableRowsLow + (((Uint64)rep->noOfTableRowsHigh) << 32);
3360
3361 FragmentPtr fragPtr;
3362 tabPtr.p->fragments.getPtr(fragPtr, rep->fragmentNo);
3363
3364 fragPtr.p->noOfRecords =
3365 rep->noOfFragmentRowsLow + (((Uint64)rep->noOfFragmentRowsHigh) << 32);
3366 }
3367
3368 /*****************************************************************************
3369 *
3370 * Slave functionallity - Drop triggers
3371 *
3372 *****************************************************************************/
3373
3374 void
sendDropTrig(Signal * signal,BackupRecordPtr ptr)3375 Backup::sendDropTrig(Signal* signal, BackupRecordPtr ptr)
3376 {
3377 TablePtr tabPtr;
3378 ptr.p->slaveData.gsn = GSN_DROP_TRIG_IMPL_REQ;
3379
3380 if (ptr.p->slaveData.dropTrig.tableId == RNIL) {
3381 jam();
3382 if(ptr.p->tables.count())
3383 ptr.p->tables.first(tabPtr);
3384 else
3385 {
3386 // Early abort, go to close files
3387 jam();
3388 closeFiles(signal, ptr);
3389 return;
3390 }
3391 } else {
3392 jam();
3393 ndbrequire(findTable(ptr, tabPtr, ptr.p->slaveData.dropTrig.tableId));
3394 ptr.p->tables.next(tabPtr);
3395 }//if
3396 if (tabPtr.i != RNIL) {
3397 jam();
3398 sendDropTrig(signal, ptr, tabPtr);
3399 } else {
3400 /**
3401 * Insert footers
3402 */
3403 //if backup error, we needn't insert footers
3404 if(ptr.p->checkError())
3405 {
3406 jam();
3407 closeFiles(signal, ptr);
3408 ptr.p->errorCode = 0;
3409 return;
3410 }
3411
3412 {
3413 BackupFilePtr filePtr;
3414 ptr.p->files.getPtr(filePtr, ptr.p->logFilePtr);
3415 Uint32 * dst;
3416 ndbrequire(filePtr.p->operation.dataBuffer.getWritePtr(&dst, 1));
3417 * dst = 0;
3418 filePtr.p->operation.dataBuffer.updateWritePtr(1);
3419 }
3420
3421 {
3422 BackupFilePtr filePtr;
3423 ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr);
3424
3425 const Uint32 gcpSz = sizeof(BackupFormat::CtlFile::GCPEntry) >> 2;
3426
3427 Uint32 * dst;
3428 ndbrequire(filePtr.p->operation.dataBuffer.getWritePtr(&dst, gcpSz));
3429
3430 BackupFormat::CtlFile::GCPEntry * gcp =
3431 (BackupFormat::CtlFile::GCPEntry*)dst;
3432
3433 gcp->SectionType = htonl(BackupFormat::GCP_ENTRY);
3434 gcp->SectionLength = htonl(gcpSz);
3435 gcp->StartGCP = htonl(ptr.p->startGCP);
3436 gcp->StopGCP = htonl(ptr.p->stopGCP - 1);
3437 filePtr.p->operation.dataBuffer.updateWritePtr(gcpSz);
3438
3439 {
3440 TablePtr tabPtr;
3441 if (ptr.p->tables.first(tabPtr))
3442 {
3443 jam();
3444 signal->theData[0] = BackupContinueB::BACKUP_FRAGMENT_INFO;
3445 signal->theData[1] = ptr.i;
3446 signal->theData[2] = tabPtr.i;
3447 signal->theData[3] = 0;
3448 sendSignal(reference(), GSN_CONTINUEB, signal, 4, JBB);
3449 }
3450 else
3451 {
3452 jam();
3453 closeFiles(signal, ptr);
3454 }
3455 }
3456 }
3457 }
3458 }
3459
3460 void
sendDropTrig(Signal * signal,BackupRecordPtr ptr,TablePtr tabPtr)3461 Backup::sendDropTrig(Signal* signal, BackupRecordPtr ptr, TablePtr tabPtr)
3462 {
3463 jam();
3464 DropTrigImplReq* req = (DropTrigImplReq*)signal->getDataPtrSend();
3465
3466 ptr.p->slaveData.gsn = GSN_DROP_TRIG_IMPL_REQ;
3467 ptr.p->slaveData.trigSendCounter = 0;
3468 req->senderRef = reference(); // Sending to myself
3469 req->senderData = ptr.i;
3470 req->requestType = 0;
3471 req->tableId = tabPtr.p->tableId;
3472 req->tableVersion = 0;
3473 req->indexId = RNIL;
3474 req->indexVersion = 0;
3475 req->receiverRef = reference();
3476
3477 // TUP needs some triggerInfo to find right list
3478 Uint32 ti = 0;
3479 if(ptr.p->flags & BackupReq::USE_UNDO_LOG)
3480 TriggerInfo::setTriggerType(ti, TriggerType::SUBSCRIPTION_BEFORE);
3481 else
3482 TriggerInfo::setTriggerType(ti, TriggerType::SUBSCRIPTION);
3483 TriggerInfo::setTriggerActionTime(ti, TriggerActionTime::TA_DETACHED);
3484 TriggerInfo::setMonitorReplicas(ti, true);
3485 TriggerInfo::setMonitorAllAttributes(ti, false);
3486
3487 ptr.p->slaveData.dropTrig.tableId = tabPtr.p->tableId;
3488 req->tableId = tabPtr.p->tableId;
3489
3490 for (int i = 0; i < 3; i++) {
3491 Uint32 id = tabPtr.p->triggerIds[i];
3492 req->triggerId = id;
3493
3494 Uint32 ti2 = ti;
3495 TriggerInfo::setTriggerEvent(ti2, triggerEventValues[i]);
3496 req->triggerInfo = ti2;
3497
3498 sendSignal(DBTUP_REF, GSN_DROP_TRIG_IMPL_REQ,
3499 signal, DropTrigImplReq::SignalLength, JBB);
3500 ptr.p->slaveData.trigSendCounter ++;
3501 }
3502 }
3503
3504 void
execDROP_TRIG_IMPL_REF(Signal * signal)3505 Backup::execDROP_TRIG_IMPL_REF(Signal* signal)
3506 {
3507 jamEntry();
3508
3509 const DropTrigImplRef* ref = (const DropTrigImplRef*)signal->getDataPtr();
3510 const Uint32 ptrI = ref->senderData;
3511
3512 BackupRecordPtr ptr;
3513 c_backupPool.getPtr(ptr, ptrI);
3514
3515 if(ref->triggerId != ~(Uint32) 0)
3516 {
3517 ndbout << "ERROR DROPPING TRIGGER: " << ref->triggerId;
3518 ndbout << " Err: " << ref->errorCode << endl << endl;
3519 }
3520
3521 dropTrigReply(signal, ptr);
3522 }
3523
3524 void
execDROP_TRIG_IMPL_CONF(Signal * signal)3525 Backup::execDROP_TRIG_IMPL_CONF(Signal* signal)
3526 {
3527 jamEntry();
3528
3529 const DropTrigImplConf* conf = (const DropTrigImplConf*)signal->getDataPtr();
3530 const Uint32 ptrI = conf->senderData;
3531
3532 BackupRecordPtr ptr;
3533 c_backupPool.getPtr(ptr, ptrI);
3534
3535 dropTrigReply(signal, ptr);
3536 }
3537
3538 void
dropTrigReply(Signal * signal,BackupRecordPtr ptr)3539 Backup::dropTrigReply(Signal* signal, BackupRecordPtr ptr)
3540 {
3541 CRASH_INSERTION((10012));
3542
3543 ndbrequire(ptr.p->slaveData.gsn == GSN_DROP_TRIG_IMPL_REQ);
3544 ndbrequire(ptr.p->slaveData.trigSendCounter.done() == false);
3545
3546 // move from .masterData to .slaveData
3547 ptr.p->slaveData.trigSendCounter--;
3548 if(ptr.p->slaveData.trigSendCounter.done() == false){
3549 jam();
3550 return;
3551 }//if
3552
3553 sendDropTrig(signal, ptr); // recursive next
3554 }
3555
3556 /*****************************************************************************
3557 *
3558 * Master functionallity - Stop backup
3559 *
3560 *****************************************************************************/
3561 void
execSTOP_BACKUP_REF(Signal * signal)3562 Backup::execSTOP_BACKUP_REF(Signal* signal)
3563 {
3564 jamEntry();
3565
3566 StopBackupRef* ref = (StopBackupRef*)signal->getDataPtr();
3567 const Uint32 ptrI = ref->backupPtr;
3568 //const Uint32 backupId = ref->backupId;
3569 const Uint32 nodeId = ref->nodeId;
3570
3571 BackupRecordPtr ptr;
3572 c_backupPool.getPtr(ptr, ptrI);
3573
3574 ptr.p->setErrorCode(ref->errorCode);
3575 stopBackupReply(signal, ptr, nodeId);
3576 }
3577
3578 void
sendStopBackup(Signal * signal,BackupRecordPtr ptr)3579 Backup::sendStopBackup(Signal* signal, BackupRecordPtr ptr)
3580 {
3581 jam();
3582
3583 StopBackupReq* stop = (StopBackupReq*)signal->getDataPtrSend();
3584 stop->backupPtr = ptr.i;
3585 stop->backupId = ptr.p->backupId;
3586 stop->startGCP = ptr.p->startGCP;
3587 stop->stopGCP = ptr.p->stopGCP;
3588
3589 ptr.p->masterData.gsn = GSN_STOP_BACKUP_REQ;
3590 ptr.p->masterData.sendCounter = ptr.p->nodes;
3591 BlockNumber backupBlockNo = numberToBlock(BACKUP, instanceKey(ptr));
3592 NodeReceiverGroup rg(backupBlockNo, ptr.p->nodes);
3593 sendSignal(rg, GSN_STOP_BACKUP_REQ, signal,
3594 StopBackupReq::SignalLength, JBB);
3595 }
3596
3597 void
execSTOP_BACKUP_CONF(Signal * signal)3598 Backup::execSTOP_BACKUP_CONF(Signal* signal)
3599 {
3600 jamEntry();
3601
3602 StopBackupConf* conf = (StopBackupConf*)signal->getDataPtr();
3603 const Uint32 ptrI = conf->backupPtr;
3604 //const Uint32 backupId = conf->backupId;
3605 const Uint32 nodeId = refToNode(signal->senderBlockRef());
3606
3607 BackupRecordPtr ptr;
3608 c_backupPool.getPtr(ptr, ptrI);
3609
3610 ptr.p->noOfLogBytes += conf->noOfLogBytes;
3611 ptr.p->noOfLogRecords += conf->noOfLogRecords;
3612
3613 stopBackupReply(signal, ptr, nodeId);
3614 }
3615
3616 void
stopBackupReply(Signal * signal,BackupRecordPtr ptr,Uint32 nodeId)3617 Backup::stopBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId)
3618 {
3619 CRASH_INSERTION((10013));
3620
3621 if (!haveAllSignals(ptr, GSN_STOP_BACKUP_REQ, nodeId)) {
3622 jam();
3623 return;
3624 }
3625
3626 sendAbortBackupOrd(signal, ptr, AbortBackupOrd::BackupComplete);
3627
3628 if(!ptr.p->checkError() && ptr.p->masterData.errorCode == 0)
3629 {
3630 if (SEND_BACKUP_COMPLETED_FLAG(ptr.p->flags))
3631 {
3632 BackupCompleteRep * rep = (BackupCompleteRep*)signal->getDataPtrSend();
3633 rep->backupId = ptr.p->backupId;
3634 rep->senderData = ptr.p->clientData;
3635 rep->startGCP = ptr.p->startGCP;
3636 rep->stopGCP = ptr.p->stopGCP;
3637 rep->noOfBytesLow = (Uint32)(ptr.p->noOfBytes & 0xFFFFFFFF);
3638 rep->noOfRecordsLow = (Uint32)(ptr.p->noOfRecords & 0xFFFFFFFF);
3639 rep->noOfBytesHigh = (Uint32)(ptr.p->noOfBytes >> 32);
3640 rep->noOfRecordsHigh = (Uint32)(ptr.p->noOfRecords >> 32);
3641 rep->noOfLogBytes = Uint32(ptr.p->noOfLogBytes); // TODO 64-bit log-bytes
3642 rep->noOfLogRecords = Uint32(ptr.p->noOfLogRecords); // TODO ^^
3643 rep->nodes = ptr.p->nodes;
3644 sendSignal(ptr.p->clientRef, GSN_BACKUP_COMPLETE_REP, signal,
3645 BackupCompleteRep::SignalLength, JBB);
3646 }
3647
3648 signal->theData[0] = NDB_LE_BackupCompleted;
3649 signal->theData[1] = ptr.p->clientRef;
3650 signal->theData[2] = ptr.p->backupId;
3651 signal->theData[3] = ptr.p->startGCP;
3652 signal->theData[4] = ptr.p->stopGCP;
3653 signal->theData[5] = (Uint32)(ptr.p->noOfBytes & 0xFFFFFFFF);
3654 signal->theData[6] = (Uint32)(ptr.p->noOfRecords & 0xFFFFFFFF);
3655 signal->theData[7] = (Uint32)(ptr.p->noOfLogBytes & 0xFFFFFFFF);
3656 signal->theData[8] = (Uint32)(ptr.p->noOfLogRecords & 0xFFFFFFFF);
3657 ptr.p->nodes.copyto(NdbNodeBitmask::Size, signal->theData+9);
3658 signal->theData[9+NdbNodeBitmask::Size] = (Uint32)(ptr.p->noOfBytes >> 32);
3659 signal->theData[10+NdbNodeBitmask::Size] = (Uint32)(ptr.p->noOfRecords >> 32);
3660 signal->theData[11+NdbNodeBitmask::Size] = (Uint32)(ptr.p->noOfLogBytes >> 32);
3661 signal->theData[12+NdbNodeBitmask::Size] = (Uint32)(ptr.p->noOfLogRecords >> 32);
3662 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 13+NdbNodeBitmask::Size, JBB);
3663 }
3664 else
3665 {
3666 masterAbort(signal, ptr);
3667 }
3668 }
3669
3670 void
initReportStatus(Signal * signal,BackupRecordPtr ptr)3671 Backup::initReportStatus(Signal *signal, BackupRecordPtr ptr)
3672 {
3673 ptr.p->m_prev_report = NdbTick_getCurrentTicks();
3674 }
3675
3676 void
checkReportStatus(Signal * signal,BackupRecordPtr ptr)3677 Backup::checkReportStatus(Signal *signal, BackupRecordPtr ptr)
3678 {
3679 if (m_backup_report_frequency == 0)
3680 return;
3681
3682 const NDB_TICKS now = NdbTick_getCurrentTicks();
3683 const Uint64 elapsed = NdbTick_Elapsed(ptr.p->m_prev_report, now).seconds();
3684 if (elapsed > m_backup_report_frequency)
3685 {
3686 reportStatus(signal, ptr);
3687 ptr.p->m_prev_report = now;
3688 }
3689 }
3690
3691 void
reportStatus(Signal * signal,BackupRecordPtr ptr,BlockReference ref)3692 Backup::reportStatus(Signal* signal, BackupRecordPtr ptr,
3693 BlockReference ref)
3694 {
3695 const int signal_length = 11;
3696
3697 signal->theData[0] = NDB_LE_BackupStatus;
3698 for (int i= 1; i < signal_length; i++)
3699 signal->theData[i] = 0;
3700
3701 if (ptr.i == RNIL ||
3702 (ptr.p->m_gsn == 0 &&
3703 ptr.p->masterData.gsn == 0))
3704 {
3705 sendSignal(ref, GSN_EVENT_REP, signal, signal_length, JBB);
3706 return;
3707 }
3708 signal->theData[1] = ptr.p->clientRef;
3709 signal->theData[2] = ptr.p->backupId;
3710
3711 if (ptr.p->dataFilePtr == RNIL)
3712 {
3713 sendSignal(ref, GSN_EVENT_REP, signal, signal_length, JBB);
3714 return;
3715 }
3716
3717 BackupFilePtr dataFilePtr;
3718 ptr.p->files.getPtr(dataFilePtr, ptr.p->dataFilePtr);
3719 signal->theData[3] = (Uint32)(dataFilePtr.p->operation.m_bytes_total & 0xFFFFFFFF);
3720 signal->theData[4] = (Uint32)(dataFilePtr.p->operation.m_bytes_total >> 32);
3721 signal->theData[5] = (Uint32)(dataFilePtr.p->operation.m_records_total & 0xFFFFFFFF);
3722 signal->theData[6] = (Uint32)(dataFilePtr.p->operation.m_records_total >> 32);
3723
3724 if (ptr.p->logFilePtr == RNIL)
3725 {
3726 sendSignal(ref, GSN_EVENT_REP, signal, signal_length, JBB);
3727 return;
3728 }
3729
3730 BackupFilePtr logFilePtr;
3731 ptr.p->files.getPtr(logFilePtr, ptr.p->logFilePtr);
3732 signal->theData[7] = (Uint32)(logFilePtr.p->operation.m_bytes_total & 0xFFFFFFFF);
3733 signal->theData[8] = (Uint32)(logFilePtr.p->operation.m_bytes_total >> 32);
3734 signal->theData[9] = (Uint32)(logFilePtr.p->operation.m_records_total & 0xFFFFFFFF);
3735 signal->theData[10]= (Uint32)(logFilePtr.p->operation.m_records_total >> 32);
3736
3737 sendSignal(ref, GSN_EVENT_REP, signal, signal_length, JBB);
3738 }
3739
3740 /*****************************************************************************
3741 *
3742 * Master functionallity - Abort backup
3743 *
3744 *****************************************************************************/
3745 void
masterAbort(Signal * signal,BackupRecordPtr ptr)3746 Backup::masterAbort(Signal* signal, BackupRecordPtr ptr)
3747 {
3748 jam();
3749 #ifdef DEBUG_ABORT
3750 ndbout_c("************ masterAbort");
3751 #endif
3752
3753 ndbassert(ptr.p->masterRef == reference());
3754
3755 if(ptr.p->masterData.errorCode != 0)
3756 {
3757 jam();
3758 return;
3759 }
3760
3761 if (SEND_BACKUP_STARTED_FLAG(ptr.p->flags))
3762 {
3763 BackupAbortRep* rep = (BackupAbortRep*)signal->getDataPtrSend();
3764 rep->backupId = ptr.p->backupId;
3765 rep->senderData = ptr.p->clientData;
3766 rep->reason = ptr.p->errorCode;
3767 sendSignal(ptr.p->clientRef, GSN_BACKUP_ABORT_REP, signal,
3768 BackupAbortRep::SignalLength, JBB);
3769 }
3770 signal->theData[0] = NDB_LE_BackupAborted;
3771 signal->theData[1] = ptr.p->clientRef;
3772 signal->theData[2] = ptr.p->backupId;
3773 signal->theData[3] = ptr.p->errorCode;
3774 sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
3775
3776 ndbrequire(ptr.p->errorCode);
3777 ptr.p->masterData.errorCode = ptr.p->errorCode;
3778
3779 AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend();
3780 ord->backupId = ptr.p->backupId;
3781 ord->backupPtr = ptr.i;
3782 ord->senderData= ptr.i;
3783 BlockNumber backupBlockNo = numberToBlock(BACKUP, instanceKey(ptr));
3784 NodeReceiverGroup rg(backupBlockNo, ptr.p->nodes);
3785
3786 switch(ptr.p->masterData.gsn){
3787 case GSN_DEFINE_BACKUP_REQ:
3788 ord->requestType = AbortBackupOrd::BackupFailure;
3789 sendSignal(rg, GSN_ABORT_BACKUP_ORD, signal,
3790 AbortBackupOrd::SignalLength, JBB);
3791 return;
3792 case GSN_CREATE_TRIG_IMPL_REQ:
3793 case GSN_START_BACKUP_REQ:
3794 case GSN_ALTER_TRIG_REQ:
3795 case GSN_WAIT_GCP_REQ:
3796 case GSN_BACKUP_FRAGMENT_REQ:
3797 jam();
3798 ptr.p->stopGCP= ptr.p->startGCP + 1;
3799 sendStopBackup(signal, ptr); // dropping due to error
3800 return;
3801 case GSN_UTIL_SEQUENCE_REQ:
3802 case GSN_UTIL_LOCK_REQ:
3803 ndbrequire(false);
3804 return;
3805 case GSN_DROP_TRIG_IMPL_REQ:
3806 case GSN_STOP_BACKUP_REQ:
3807 return;
3808 }
3809 }
3810
3811 void
abort_scan(Signal * signal,BackupRecordPtr ptr)3812 Backup::abort_scan(Signal * signal, BackupRecordPtr ptr)
3813 {
3814 AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend();
3815 ord->backupId = ptr.p->backupId;
3816 ord->backupPtr = ptr.i;
3817 ord->senderData= ptr.i;
3818 ord->requestType = AbortBackupOrd::AbortScan;
3819
3820 TablePtr tabPtr;
3821 ptr.p->tables.first(tabPtr);
3822 for(; tabPtr.i != RNIL; ptr.p->tables.next(tabPtr)) {
3823 jam();
3824 FragmentPtr fragPtr;
3825 Array<Fragment> & frags = tabPtr.p->fragments;
3826 const Uint32 fragCount = frags.getSize();
3827
3828 for(Uint32 i = 0; i<fragCount; i++) {
3829 jam();
3830 tabPtr.p->fragments.getPtr(fragPtr, i);
3831 const Uint32 nodeId = fragPtr.p->node;
3832 if(fragPtr.p->scanning != 0 && ptr.p->nodes.get(nodeId)) {
3833 jam();
3834
3835 BlockReference ref = numberToRef(BACKUP, instanceKey(ptr), nodeId);
3836 sendSignal(ref, GSN_ABORT_BACKUP_ORD, signal,
3837 AbortBackupOrd::SignalLength, JBB);
3838
3839 }
3840 }
3841 }
3842 }
3843
3844 /*****************************************************************************
3845 *
3846 * Slave functionallity: Define Backup
3847 *
3848 *****************************************************************************/
3849 void
defineBackupRef(Signal * signal,BackupRecordPtr ptr,Uint32 errCode)3850 Backup::defineBackupRef(Signal* signal, BackupRecordPtr ptr, Uint32 errCode)
3851 {
3852 jam();
3853 ptr.p->setErrorCode(errCode);
3854 if(ptr.p->is_lcp())
3855 {
3856 jam();
3857 if (ptr.p->ctlFilePtr == RNIL) {
3858 ptr.p->m_gsn = GSN_DEFINE_BACKUP_REF;
3859 ndbrequire(ptr.p->errorCode != 0);
3860 DefineBackupRef* ref = (DefineBackupRef*)signal->getDataPtrSend();
3861 ref->backupId = ptr.p->backupId;
3862 ref->backupPtr = ptr.i;
3863 ref->errorCode = ptr.p->errorCode;
3864 ref->nodeId = getOwnNodeId();
3865 sendSignal(ptr.p->masterRef, GSN_DEFINE_BACKUP_REF, signal,
3866 DefineBackupRef::SignalLength, JBB);
3867 return;
3868 }
3869
3870 BackupFilePtr filePtr;
3871 ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr);
3872 if (filePtr.p->m_flags & BackupFile::BF_LCP_META)
3873 {
3874 jam();
3875 ndbrequire(! (filePtr.p->m_flags & BackupFile::BF_FILE_THREAD));
3876 filePtr.p->m_flags &= ~(Uint32)BackupFile::BF_LCP_META;
3877 if (filePtr.p->m_flags & BackupFile::BF_OPEN)
3878 {
3879 closeFile(signal, ptr, filePtr);
3880 return;
3881 }
3882 }
3883
3884 ndbrequire(filePtr.p->m_flags == 0);
3885
3886 TablePtr tabPtr;
3887 FragmentPtr fragPtr;
3888
3889 ndbrequire(ptr.p->tables.first(tabPtr));
3890 tabPtr.p->fragments.getPtr(fragPtr, 0);
3891
3892 LcpPrepareRef* ref= (LcpPrepareRef*)signal->getDataPtrSend();
3893 ref->senderData = ptr.p->clientData;
3894 ref->senderRef = reference();
3895 ref->tableId = tabPtr.p->tableId;
3896 ref->fragmentId = fragPtr.p->fragmentId;
3897 ref->errorCode = errCode;
3898 sendSignal(ptr.p->masterRef, GSN_LCP_PREPARE_REF,
3899 signal, LcpPrepareRef::SignalLength, JBA);
3900 return;
3901 }
3902
3903 ptr.p->m_gsn = GSN_DEFINE_BACKUP_REF;
3904 ndbrequire(ptr.p->errorCode != 0);
3905
3906 DefineBackupRef* ref = (DefineBackupRef*)signal->getDataPtrSend();
3907 ref->backupId = ptr.p->backupId;
3908 ref->backupPtr = ptr.i;
3909 ref->errorCode = ptr.p->errorCode;
3910 ref->nodeId = getOwnNodeId();
3911 sendSignal(ptr.p->masterRef, GSN_DEFINE_BACKUP_REF, signal,
3912 DefineBackupRef::SignalLength, JBB);
3913 }
3914
3915 void
execDEFINE_BACKUP_REQ(Signal * signal)3916 Backup::execDEFINE_BACKUP_REQ(Signal* signal)
3917 {
3918 jamEntry();
3919
3920 DefineBackupReq* req = (DefineBackupReq*)signal->getDataPtr();
3921
3922 BackupRecordPtr ptr;
3923 const Uint32 ptrI = req->backupPtr;
3924 const Uint32 backupId = req->backupId;
3925 const BlockReference senderRef = req->senderRef;
3926
3927 if(senderRef == reference()){
3928 /**
3929 * Signal sent from myself -> record already seized
3930 */
3931 jam();
3932 c_backupPool.getPtr(ptr, ptrI);
3933 } else { // from other node
3934 jam();
3935 #ifdef DEBUG_ABORT
3936 dumpUsedResources();
3937 #endif
3938 if (!c_backups.getPool().seizeId(ptr, ptrI)) {
3939 jam();
3940 ndbrequire(false); // If master has succeeded slave should succed
3941 }//if
3942 c_backups.addFirst(ptr);
3943 }//if
3944
3945 CRASH_INSERTION((10014));
3946
3947 ptr.p->m_gsn = GSN_DEFINE_BACKUP_REQ;
3948 ptr.p->slaveState.forceState(INITIAL);
3949 ptr.p->slaveState.setState(DEFINING);
3950 ptr.p->slaveData.dropTrig.tableId = RNIL;
3951 ptr.p->errorCode = 0;
3952 ptr.p->clientRef = req->clientRef;
3953 ptr.p->clientData = req->clientData;
3954 if(senderRef == reference())
3955 ptr.p->flags = req->flags;
3956 else
3957 ptr.p->flags = req->flags & ~((Uint32)BackupReq::WAITCOMPLETED); /* remove waitCompleted flags
3958 * as non master should never
3959 * reply
3960 */
3961 ptr.p->masterRef = senderRef;
3962 ptr.p->nodes = req->nodes;
3963 ptr.p->backupId = backupId;
3964 ptr.p->backupKey[0] = req->backupKey[0];
3965 ptr.p->backupKey[1] = req->backupKey[1];
3966 ptr.p->backupDataLen = req->backupDataLen;
3967 ptr.p->masterData.errorCode = 0;
3968 ptr.p->noOfBytes = 0;
3969 ptr.p->noOfRecords = 0;
3970 ptr.p->noOfLogBytes = 0;
3971 ptr.p->noOfLogRecords = 0;
3972 ptr.p->currGCP = 0;
3973 ptr.p->startGCP = 0;
3974 ptr.p->stopGCP = 0;
3975 ptr.p->m_prioA_scan_batches_to_execute = 0;
3976 ptr.p->m_lastSignalId = 0;
3977
3978 /**
3979 * Allocate files
3980 */
3981 BackupFilePtr files[3];
3982 Uint32 noOfPages[] = {
3983 NO_OF_PAGES_META_FILE,
3984 2, // 32k
3985 0 // 3M
3986 };
3987 const Uint32 maxInsert[] = {
3988 MAX_WORDS_META_FILE,
3989 4096, // 16k
3990 // Max 16 tuples
3991 ZRESERVED_SCAN_BATCH_SIZE *
3992 (MAX_TUPLE_SIZE_IN_WORDS + MAX_ATTRIBUTES_IN_TABLE + 128/* safety */),
3993 };
3994 Uint32 minWrite[] = {
3995 8192,
3996 8192,
3997 32768
3998 };
3999 Uint32 maxWrite[] = {
4000 8192,
4001 8192,
4002 32768
4003 };
4004
4005 minWrite[1] = c_defaults.m_minWriteSize;
4006 maxWrite[1] = c_defaults.m_maxWriteSize;
4007 noOfPages[1] = (c_defaults.m_logBufferSize + sizeof(Page32) - 1) /
4008 sizeof(Page32);
4009 minWrite[2] = c_defaults.m_minWriteSize;
4010 maxWrite[2] = c_defaults.m_maxWriteSize;
4011 noOfPages[2] = (c_defaults.m_dataBufferSize + sizeof(Page32) - 1) /
4012 sizeof(Page32);
4013
4014 if (ptr.p->is_lcp())
4015 {
4016 noOfPages[2] = (c_defaults.m_lcp_buffer_size + sizeof(Page32) - 1) /
4017 sizeof(Page32);
4018 }
4019
4020 ptr.p->ctlFilePtr = ptr.p->logFilePtr = ptr.p->dataFilePtr = RNIL;
4021
4022 for(Uint32 i = 0; i<3; i++) {
4023 jam();
4024 if(ptr.p->is_lcp() && i != 2)
4025 {
4026 files[i].i = RNIL;
4027 continue;
4028 }
4029 if (!ptr.p->files.seizeFirst(files[i])) {
4030 jam();
4031 defineBackupRef(signal, ptr,
4032 DefineBackupRef::FailedToAllocateFileRecord);
4033 return;
4034 }//if
4035
4036 files[i].p->tableId = RNIL;
4037 files[i].p->backupPtr = ptr.i;
4038 files[i].p->filePointer = RNIL;
4039 files[i].p->m_flags = 0;
4040 files[i].p->errorCode = 0;
4041 files[i].p->m_sent_words_in_scan_batch = 0;
4042 files[i].p->m_num_scan_req_on_prioa = 0;
4043
4044 if(ERROR_INSERTED(10035) || files[i].p->pages.seize(noOfPages[i]) == false)
4045 {
4046 jam();
4047 DEBUG_OUT("Failed to seize " << noOfPages[i] << " pages");
4048 defineBackupRef(signal, ptr, DefineBackupRef::FailedToAllocateBuffers);
4049 return;
4050 }//if
4051 Page32Ptr pagePtr;
4052 files[i].p->pages.getPtr(pagePtr, 0);
4053
4054 const char * msg = files[i].p->
4055 operation.dataBuffer.setup((Uint32*)pagePtr.p,
4056 noOfPages[i] * (sizeof(Page32) >> 2),
4057 128,
4058 minWrite[i] >> 2,
4059 maxWrite[i] >> 2,
4060 maxInsert[i]);
4061 if(msg != 0) {
4062 jam();
4063 defineBackupRef(signal, ptr, DefineBackupRef::FailedToSetupFsBuffers);
4064 return;
4065 }//if
4066
4067 switch(i){
4068 case 0:
4069 files[i].p->fileType = BackupFormat::CTL_FILE;
4070 ptr.p->ctlFilePtr = files[i].i;
4071 break;
4072 case 1:
4073 if(ptr.p->flags & BackupReq::USE_UNDO_LOG)
4074 files[i].p->fileType = BackupFormat::UNDO_FILE;
4075 else
4076 files[i].p->fileType = BackupFormat::LOG_FILE;
4077 ptr.p->logFilePtr = files[i].i;
4078 break;
4079 case 2:
4080 files[i].p->fileType = BackupFormat::DATA_FILE;
4081 ptr.p->dataFilePtr = files[i].i;
4082 }
4083 files[i].p->operation.m_bytes_total = 0;
4084 files[i].p->operation.m_records_total = 0;
4085 }//for
4086
4087 initReportStatus(signal, ptr);
4088
4089 if (!verifyNodesAlive(ptr, ptr.p->nodes)) {
4090 jam();
4091 defineBackupRef(signal, ptr, DefineBackupRef::Undefined);
4092 return;
4093 }//if
4094 if (ERROR_INSERTED(10027)) {
4095 jam();
4096 defineBackupRef(signal, ptr, 327);
4097 return;
4098 }//if
4099
4100 if(ptr.p->backupDataLen == 0) {
4101 jam();
4102 backupAllData(signal, ptr);
4103 return;
4104 }//if
4105
4106 if(ptr.p->is_lcp())
4107 {
4108 jam();
4109 getFragmentInfoDone(signal, ptr);
4110 return;
4111 }
4112
4113 /**
4114 * Not implemented
4115 */
4116 ndbrequire(0);
4117 }
4118
4119 void
backupAllData(Signal * signal,BackupRecordPtr ptr)4120 Backup::backupAllData(Signal* signal, BackupRecordPtr ptr)
4121 {
4122 /**
4123 * Get all tables from dict
4124 */
4125 ListTablesReq * req = (ListTablesReq*)signal->getDataPtrSend();
4126 req->init();
4127 req->senderRef = reference();
4128 req->senderData = ptr.i;
4129 req->setTableId(0);
4130 req->setTableType(0);
4131 sendSignal(DBDICT_REF, GSN_LIST_TABLES_REQ, signal,
4132 ListTablesReq::SignalLength, JBB);
4133 }
4134
4135 void
execLIST_TABLES_CONF(Signal * signal)4136 Backup::execLIST_TABLES_CONF(Signal* signal)
4137 {
4138 jamEntry();
4139 Uint32 fragInfo = signal->header.m_fragmentInfo;
4140 ListTablesConf* conf = (ListTablesConf*)signal->getDataPtr();
4141 Uint32 noOfTables = conf->noOfTables;
4142
4143 BackupRecordPtr ptr;
4144 c_backupPool.getPtr(ptr, conf->senderData);
4145
4146 SectionHandle handle (this, signal);
4147 signal->header.m_fragmentInfo = 0;
4148 if (noOfTables > 0)
4149 {
4150 ListTablesData ltd;
4151 const Uint32 listTablesDataSizeInWords = (sizeof(ListTablesData) + 3) / 4;
4152 SegmentedSectionPtr tableDataPtr;
4153 handle.getSection(tableDataPtr, ListTablesConf::TABLE_DATA);
4154 SimplePropertiesSectionReader
4155 tableDataReader(tableDataPtr, getSectionSegmentPool());
4156
4157 tableDataReader.reset();
4158 for(unsigned int i = 0; i<noOfTables; i++) {
4159 jam();
4160 tableDataReader.getWords((Uint32 *)<d, listTablesDataSizeInWords);
4161 Uint32 tableId = ltd.getTableId();
4162 Uint32 tableType = ltd.getTableType();
4163 Uint32 state= ltd.getTableState();
4164
4165 if (! (DictTabInfo::isTable(tableType) ||
4166 DictTabInfo::isIndex(tableType) ||
4167 DictTabInfo::isFilegroup(tableType) ||
4168 DictTabInfo::isFile(tableType)
4169 || DictTabInfo::isHashMap(tableType)
4170 || DictTabInfo::isForeignKey(tableType)
4171 ))
4172 {
4173 jam();
4174 continue;
4175 }
4176
4177 if (state != DictTabInfo::StateOnline)
4178 {
4179 jam();
4180 continue;
4181 }
4182
4183 TablePtr tabPtr;
4184 ptr.p->tables.seizeLast(tabPtr);
4185 if(tabPtr.i == RNIL) {
4186 jam();
4187 defineBackupRef(signal, ptr, DefineBackupRef::FailedToAllocateTables);
4188 releaseSections(handle);
4189 return;
4190 }//if
4191 tabPtr.p->tableId = tableId;
4192 tabPtr.p->tableType = tableType;
4193 }//for
4194 }
4195
4196 releaseSections(handle);
4197
4198 /*
4199 If first or not last signal
4200 then keep accumulating table data
4201 */
4202 if ((fragInfo == 1) || (fragInfo == 2))
4203 {
4204 return;
4205 }
4206 openFiles(signal, ptr);
4207 }
4208
4209 void
openFiles(Signal * signal,BackupRecordPtr ptr)4210 Backup::openFiles(Signal* signal, BackupRecordPtr ptr)
4211 {
4212 jam();
4213
4214 BackupFilePtr filePtr;
4215
4216 FsOpenReq * req = (FsOpenReq *)signal->getDataPtrSend();
4217 req->userReference = reference();
4218 req->fileFlags =
4219 FsOpenReq::OM_WRITEONLY |
4220 FsOpenReq::OM_CREATE_IF_NONE |
4221 FsOpenReq::OM_APPEND |
4222 FsOpenReq::OM_AUTOSYNC;
4223
4224 if (c_defaults.m_compressed_backup)
4225 req->fileFlags |= FsOpenReq::OM_GZ;
4226
4227 FsOpenReq::v2_setCount(req->fileNumber, 0xFFFFFFFF);
4228 req->auto_sync_size = c_defaults.m_disk_synch_size;
4229 /**
4230 * Ctl file
4231 */
4232 c_backupFilePool.getPtr(filePtr, ptr.p->ctlFilePtr);
4233 filePtr.p->m_flags |= BackupFile::BF_OPENING;
4234
4235 req->userPointer = filePtr.i;
4236 FsOpenReq::setVersion(req->fileNumber, 2);
4237 FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_CTL);
4238 FsOpenReq::v2_setSequence(req->fileNumber, ptr.p->backupId);
4239 FsOpenReq::v2_setNodeId(req->fileNumber, getOwnNodeId());
4240 sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, FsOpenReq::SignalLength, JBA);
4241
4242 /**
4243 * Log file
4244 */
4245 c_backupFilePool.getPtr(filePtr, ptr.p->logFilePtr);
4246 filePtr.p->m_flags |= BackupFile::BF_OPENING;
4247
4248 //write uncompressed log file when enable undo log,since log file is read from back to front.
4249 if(ptr.p->flags & BackupReq::USE_UNDO_LOG)
4250 req->fileFlags &= ~FsOpenReq::OM_GZ;
4251
4252 req->userPointer = filePtr.i;
4253 FsOpenReq::setVersion(req->fileNumber, 2);
4254 FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_LOG);
4255 FsOpenReq::v2_setSequence(req->fileNumber, ptr.p->backupId);
4256 FsOpenReq::v2_setNodeId(req->fileNumber, getOwnNodeId());
4257 sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, FsOpenReq::SignalLength, JBA);
4258
4259 /**
4260 * Data file
4261 */
4262 c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr);
4263 filePtr.p->m_flags |= BackupFile::BF_OPENING;
4264
4265 if (c_defaults.m_o_direct)
4266 req->fileFlags |= FsOpenReq::OM_DIRECT;
4267 if (c_defaults.m_compressed_backup)
4268 req->fileFlags |= FsOpenReq::OM_GZ;
4269 req->userPointer = filePtr.i;
4270 FsOpenReq::setVersion(req->fileNumber, 2);
4271 FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_DATA);
4272 FsOpenReq::v2_setSequence(req->fileNumber, ptr.p->backupId);
4273 FsOpenReq::v2_setNodeId(req->fileNumber, getOwnNodeId());
4274 FsOpenReq::v2_setCount(req->fileNumber, 0);
4275 sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, FsOpenReq::SignalLength, JBA);
4276 }
4277
4278 void
execFSOPENREF(Signal * signal)4279 Backup::execFSOPENREF(Signal* signal)
4280 {
4281 jamEntry();
4282
4283 FsRef * ref = (FsRef *)signal->getDataPtr();
4284
4285 const Uint32 userPtr = ref->userPointer;
4286
4287 BackupFilePtr filePtr;
4288 c_backupFilePool.getPtr(filePtr, userPtr);
4289
4290 BackupRecordPtr ptr;
4291 c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
4292 ptr.p->setErrorCode(ref->errorCode);
4293 openFilesReply(signal, ptr, filePtr);
4294 }
4295
4296 void
execFSOPENCONF(Signal * signal)4297 Backup::execFSOPENCONF(Signal* signal)
4298 {
4299 jamEntry();
4300
4301 FsConf * conf = (FsConf *)signal->getDataPtr();
4302
4303 const Uint32 userPtr = conf->userPointer;
4304 const Uint32 filePointer = conf->filePointer;
4305
4306 BackupFilePtr filePtr;
4307 c_backupFilePool.getPtr(filePtr, userPtr);
4308 filePtr.p->filePointer = filePointer;
4309
4310 BackupRecordPtr ptr;
4311 c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
4312
4313 ndbrequire(! (filePtr.p->m_flags & BackupFile::BF_OPEN));
4314 filePtr.p->m_flags |= BackupFile::BF_OPEN;
4315 openFilesReply(signal, ptr, filePtr);
4316 }
4317
4318 void
openFilesReply(Signal * signal,BackupRecordPtr ptr,BackupFilePtr filePtr)4319 Backup::openFilesReply(Signal* signal,
4320 BackupRecordPtr ptr, BackupFilePtr filePtr)
4321 {
4322 jam();
4323
4324 /**
4325 * Mark files as "opened"
4326 */
4327 ndbrequire(filePtr.p->m_flags & BackupFile::BF_OPENING);
4328 filePtr.p->m_flags &= ~(Uint32)BackupFile::BF_OPENING;
4329 filePtr.p->m_flags |= BackupFile::BF_OPEN;
4330 /**
4331 * Check if all files have recived open_reply
4332 */
4333 for(ptr.p->files.first(filePtr); filePtr.i!=RNIL;ptr.p->files.next(filePtr))
4334 {
4335 jam();
4336 if(filePtr.p->m_flags & BackupFile::BF_OPENING) {
4337 jam();
4338 return;
4339 }//if
4340 }//for
4341
4342 if (ERROR_INSERTED(10037)) {
4343 jam();
4344 /**
4345 * Dont return FailedForBackupFilesAleadyExist
4346 * cause this will make NdbBackup auto-retry with higher number :-)
4347 */
4348 ptr.p->errorCode = DefineBackupRef::FailedInsertFileHeader;
4349 defineBackupRef(signal, ptr);
4350 return;
4351 }
4352 /**
4353 * Did open succeed for all files
4354 */
4355 if(ptr.p->checkError())
4356 {
4357 jam();
4358 if(ptr.p->errorCode == FsRef::fsErrFileExists)
4359 {
4360 jam();
4361 ptr.p->errorCode = DefineBackupRef::FailedForBackupFilesAleadyExist;
4362 }
4363 defineBackupRef(signal, ptr);
4364 return;
4365 }//if
4366
4367 if(!ptr.p->is_lcp())
4368 {
4369 /**
4370 * Insert file headers
4371 */
4372 ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr);
4373 if(!insertFileHeader(BackupFormat::CTL_FILE, ptr.p, filePtr.p)) {
4374 jam();
4375 defineBackupRef(signal, ptr, DefineBackupRef::FailedInsertFileHeader);
4376 return;
4377 }//if
4378
4379 BackupFormat::FileType logfiletype;
4380 if(ptr.p->flags & BackupReq::USE_UNDO_LOG)
4381 logfiletype = BackupFormat::UNDO_FILE;
4382 else
4383 logfiletype = BackupFormat::LOG_FILE;
4384
4385 ptr.p->files.getPtr(filePtr, ptr.p->logFilePtr);
4386 if(!insertFileHeader(logfiletype, ptr.p, filePtr.p)) {
4387 jam();
4388 defineBackupRef(signal, ptr, DefineBackupRef::FailedInsertFileHeader);
4389 return;
4390 }//if
4391
4392 ptr.p->files.getPtr(filePtr, ptr.p->dataFilePtr);
4393 if(!insertFileHeader(BackupFormat::DATA_FILE, ptr.p, filePtr.p)) {
4394 jam();
4395 defineBackupRef(signal, ptr, DefineBackupRef::FailedInsertFileHeader);
4396 return;
4397 }//if
4398 }
4399 else
4400 {
4401 ptr.p->files.getPtr(filePtr, ptr.p->dataFilePtr);
4402 if(!insertFileHeader(BackupFormat::LCP_FILE, ptr.p, filePtr.p)) {
4403 jam();
4404 defineBackupRef(signal, ptr, DefineBackupRef::FailedInsertFileHeader);
4405 return;
4406 }//if
4407
4408 ptr.p->ctlFilePtr = ptr.p->dataFilePtr;
4409 }
4410
4411 /**
4412 * Start CTL file thread
4413 */
4414 if (!ptr.p->is_lcp())
4415 {
4416 jam();
4417 ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr);
4418 filePtr.p->m_flags |= BackupFile::BF_FILE_THREAD;
4419
4420 signal->theData[0] = BackupContinueB::START_FILE_THREAD;
4421 signal->theData[1] = filePtr.i;
4422 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
4423 }
4424 else
4425 {
4426 jam();
4427 filePtr.p->m_flags |= BackupFile::BF_LCP_META;
4428 }
4429
4430 /**
4431 * Insert table list in ctl file
4432 */
4433 FsBuffer & buf = filePtr.p->operation.dataBuffer;
4434
4435 const Uint32 sz =
4436 (sizeof(BackupFormat::CtlFile::TableList) >> 2) +
4437 ptr.p->tables.count() - 1;
4438
4439 Uint32 * dst;
4440 ndbrequire(sz < buf.getMaxWrite());
4441 if(!buf.getWritePtr(&dst, sz)) {
4442 jam();
4443 defineBackupRef(signal, ptr, DefineBackupRef::FailedInsertTableList);
4444 return;
4445 }//if
4446
4447 BackupFormat::CtlFile::TableList* tl =
4448 (BackupFormat::CtlFile::TableList*)dst;
4449 tl->SectionType = htonl(BackupFormat::TABLE_LIST);
4450 tl->SectionLength = htonl(sz);
4451
4452 TablePtr tabPtr;
4453 Uint32 count = 0;
4454 for(ptr.p->tables.first(tabPtr);
4455 tabPtr.i != RNIL;
4456 ptr.p->tables.next(tabPtr)){
4457 jam();
4458 tl->TableIds[count] = htonl(tabPtr.p->tableId);
4459 count++;
4460 }//for
4461
4462 buf.updateWritePtr(sz);
4463
4464 /**
4465 * Start getting table definition data
4466 */
4467 ndbrequire(ptr.p->tables.first(tabPtr));
4468
4469 signal->theData[0] = BackupContinueB::BUFFER_FULL_META;
4470 signal->theData[1] = ptr.i;
4471 signal->theData[2] = tabPtr.i;
4472 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
4473 return;
4474 }
4475
4476 bool
insertFileHeader(BackupFormat::FileType ft,BackupRecord * ptrP,BackupFile * filePtrP)4477 Backup::insertFileHeader(BackupFormat::FileType ft,
4478 BackupRecord * ptrP,
4479 BackupFile * filePtrP){
4480 FsBuffer & buf = filePtrP->operation.dataBuffer;
4481
4482 const Uint32 sz = sizeof(BackupFormat::FileHeader) >> 2;
4483
4484 Uint32 * dst;
4485 ndbrequire(sz < buf.getMaxWrite());
4486 if(!buf.getWritePtr(&dst, sz)) {
4487 jam();
4488 return false;
4489 }//if
4490
4491 BackupFormat::FileHeader* header = (BackupFormat::FileHeader*)dst;
4492 ndbrequire(sizeof(header->Magic) == sizeof(BACKUP_MAGIC));
4493 memcpy(header->Magic, BACKUP_MAGIC, sizeof(BACKUP_MAGIC));
4494 header->BackupVersion = htonl(NDB_BACKUP_VERSION);
4495 header->SectionType = htonl(BackupFormat::FILE_HEADER);
4496 header->SectionLength = htonl(sz - 3);
4497 header->FileType = htonl(ft);
4498 header->BackupId = htonl(ptrP->backupId);
4499 header->BackupKey_0 = htonl(ptrP->backupKey[0]);
4500 header->BackupKey_1 = htonl(ptrP->backupKey[1]);
4501 header->ByteOrder = 0x12345678;
4502 header->NdbVersion = htonl(NDB_VERSION_D);
4503 header->MySQLVersion = htonl(NDB_MYSQL_VERSION_D);
4504
4505 buf.updateWritePtr(sz);
4506 return true;
4507 }
4508
4509 void
execGET_TABINFOREF(Signal * signal)4510 Backup::execGET_TABINFOREF(Signal* signal)
4511 {
4512 GetTabInfoRef * ref = (GetTabInfoRef*)signal->getDataPtr();
4513
4514 const Uint32 senderData = ref->senderData;
4515 BackupRecordPtr ptr;
4516 c_backupPool.getPtr(ptr, senderData);
4517
4518 defineBackupRef(signal, ptr, ref->errorCode);
4519 }
4520
4521 void
execGET_TABINFO_CONF(Signal * signal)4522 Backup::execGET_TABINFO_CONF(Signal* signal)
4523 {
4524 jamEntry();
4525
4526 if(!assembleFragments(signal)) {
4527 jam();
4528 return;
4529 }//if
4530
4531 GetTabInfoConf * const conf = (GetTabInfoConf*)signal->getDataPtr();
4532 //const Uint32 senderRef = info->senderRef;
4533 const Uint32 len = conf->totalLen;
4534 const Uint32 senderData = conf->senderData;
4535 const Uint32 tableType = conf->tableType;
4536 const Uint32 tableId = conf->tableId;
4537
4538 BackupRecordPtr ptr;
4539 c_backupPool.getPtr(ptr, senderData);
4540
4541 SectionHandle handle(this, signal);
4542 SegmentedSectionPtr dictTabInfoPtr;
4543 handle.getSection(dictTabInfoPtr, GetTabInfoConf::DICT_TAB_INFO);
4544 ndbrequire(dictTabInfoPtr.sz == len);
4545
4546 TablePtr tabPtr ;
4547 ndbrequire(findTable(ptr, tabPtr, tableId));
4548
4549 BackupFilePtr filePtr;
4550 ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr);
4551 FsBuffer & buf = filePtr.p->operation.dataBuffer;
4552 Uint32* dst = 0;
4553 { // Write into ctl file
4554 Uint32 dstLen = len + 3;
4555 if(!buf.getWritePtr(&dst, dstLen)) {
4556 jam();
4557 ndbrequire(false);
4558 ptr.p->setErrorCode(DefineBackupRef::FailedAllocateTableMem);
4559 releaseSections(handle);
4560 defineBackupRef(signal, ptr);
4561 return;
4562 }//if
4563 if(dst != 0) {
4564 jam();
4565
4566 BackupFormat::CtlFile::TableDescription * desc =
4567 (BackupFormat::CtlFile::TableDescription*)dst;
4568 desc->SectionType = htonl(BackupFormat::TABLE_DESCRIPTION);
4569 desc->SectionLength = htonl(len + 3);
4570 desc->TableType = htonl(tableType);
4571 dst += 3;
4572
4573 copy(dst, dictTabInfoPtr);
4574 buf.updateWritePtr(dstLen);
4575 }//if
4576 }
4577
4578 releaseSections(handle);
4579
4580 if(ptr.p->checkError()) {
4581 jam();
4582 defineBackupRef(signal, ptr);
4583 return;
4584 }//if
4585
4586 if (!DictTabInfo::isTable(tabPtr.p->tableType))
4587 {
4588 jam();
4589
4590 TablePtr tmp = tabPtr;
4591 ptr.p->tables.next(tabPtr);
4592 ptr.p->tables.release(tmp);
4593 afterGetTabinfoLockTab(signal, ptr, tabPtr);
4594 return;
4595 }
4596
4597 if (!parseTableDescription(signal, ptr, tabPtr, dst, len))
4598 {
4599 jam();
4600 defineBackupRef(signal, ptr);
4601 return;
4602 }
4603
4604 if(!ptr.p->is_lcp())
4605 {
4606 jam();
4607 BackupLockTab *req = (BackupLockTab *)signal->getDataPtrSend();
4608 req->m_senderRef = reference();
4609 req->m_tableId = tabPtr.p->tableId;
4610 req->m_lock_unlock = BackupLockTab::LOCK_TABLE;
4611 req->m_backup_state = BackupLockTab::GET_TABINFO_CONF;
4612 req->m_backupRecordPtr_I = ptr.i;
4613 req->m_tablePtr_I = tabPtr.i;
4614 sendSignal(DBDICT_REF, GSN_BACKUP_LOCK_TAB_REQ, signal,
4615 BackupLockTab::SignalLength, JBB);
4616 if (ERROR_INSERTED(10038))
4617 {
4618 /* Test */
4619 AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend();
4620 ord->backupId = ptr.p->backupId;
4621 ord->backupPtr = ptr.i;
4622 ord->requestType = AbortBackupOrd::ClientAbort;
4623 ord->senderData= ptr.p->clientData;
4624 sendSignal(ptr.p->masterRef, GSN_ABORT_BACKUP_ORD, signal,
4625 AbortBackupOrd::SignalLength, JBB);
4626 }
4627 return;
4628 }
4629
4630 ptr.p->tables.next(tabPtr);
4631 afterGetTabinfoLockTab(signal, ptr, tabPtr);
4632 }
4633
4634 void
afterGetTabinfoLockTab(Signal * signal,BackupRecordPtr ptr,TablePtr tabPtr)4635 Backup::afterGetTabinfoLockTab(Signal *signal,
4636 BackupRecordPtr ptr, TablePtr tabPtr)
4637 {
4638 if(tabPtr.i == RNIL)
4639 {
4640 /**
4641 * Done with all tables...
4642 */
4643 jam();
4644
4645 if(ptr.p->is_lcp())
4646 {
4647 jam();
4648 lcp_open_file_done(signal, ptr);
4649 return;
4650 }
4651
4652 ndbrequire(ptr.p->tables.first(tabPtr));
4653 DihScanTabReq * req = (DihScanTabReq*)signal->getDataPtrSend();
4654 req->senderRef = reference();
4655 req->senderData = ptr.i;
4656 req->tableId = tabPtr.p->tableId;
4657 req->schemaTransId = 0;
4658 sendSignal(DBDIH_REF, GSN_DIH_SCAN_TAB_REQ, signal,
4659 DihScanTabReq::SignalLength, JBB);
4660 return;
4661 }//if
4662
4663 /**
4664 * Fetch next table...
4665 */
4666 signal->theData[0] = BackupContinueB::BUFFER_FULL_META;
4667 signal->theData[1] = ptr.i;
4668 signal->theData[2] = tabPtr.i;
4669 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
4670 return;
4671 }
4672
4673 bool
parseTableDescription(Signal * signal,BackupRecordPtr ptr,TablePtr tabPtr,const Uint32 * tabdescptr,Uint32 len)4674 Backup::parseTableDescription(Signal* signal,
4675 BackupRecordPtr ptr,
4676 TablePtr tabPtr,
4677 const Uint32 * tabdescptr,
4678 Uint32 len)
4679 {
4680 SimplePropertiesLinearReader it(tabdescptr, len);
4681
4682 it.first();
4683
4684 DictTabInfo::Table tmpTab; tmpTab.init();
4685 SimpleProperties::UnpackStatus stat;
4686 stat = SimpleProperties::unpack(it, &tmpTab,
4687 DictTabInfo::TableMapping,
4688 DictTabInfo::TableMappingSize,
4689 true, true);
4690 ndbrequire(stat == SimpleProperties::Break);
4691
4692 bool lcp = ptr.p->is_lcp();
4693
4694 ndbrequire(tabPtr.p->tableId == tmpTab.TableId);
4695 ndbrequire(lcp || (tabPtr.p->tableType == tmpTab.TableType));
4696
4697 /**
4698 * LCP should not save disk attributes but only mem attributes
4699 */
4700
4701 /**
4702 * Initialize table object
4703 */
4704 tabPtr.p->noOfRecords = 0;
4705 tabPtr.p->schemaVersion = tmpTab.TableVersion;
4706 tabPtr.p->triggerIds[0] = ILLEGAL_TRIGGER_ID;
4707 tabPtr.p->triggerIds[1] = ILLEGAL_TRIGGER_ID;
4708 tabPtr.p->triggerIds[2] = ILLEGAL_TRIGGER_ID;
4709 tabPtr.p->triggerAllocated[0] = false;
4710 tabPtr.p->triggerAllocated[1] = false;
4711 tabPtr.p->triggerAllocated[2] = false;
4712
4713 tabPtr.p->noOfAttributes = tmpTab.NoOfAttributes;
4714 tabPtr.p->maxRecordSize = 1; // LEN word
4715 bzero(tabPtr.p->attrInfo, sizeof(tabPtr.p->attrInfo));
4716
4717 if (lcp)
4718 {
4719 jam();
4720 AttributeHeader::init(tabPtr.p->attrInfo, AttributeHeader::READ_LCP, 0);
4721 }
4722 else
4723 {
4724 jam();
4725 AttributeHeader::init(tabPtr.p->attrInfo, AttributeHeader::READ_ALL,
4726 tmpTab.NoOfAttributes);
4727 }
4728
4729 Uint32 varsize = 0;
4730 Uint32 disk = 0;
4731 Uint32 null = 0;
4732 for(Uint32 i = 0; i<tmpTab.NoOfAttributes; i++) {
4733 jam();
4734 DictTabInfo::Attribute tmp; tmp.init();
4735 stat = SimpleProperties::unpack(it, &tmp,
4736 DictTabInfo::AttributeMapping,
4737 DictTabInfo::AttributeMappingSize,
4738 true, true);
4739
4740 ndbrequire(stat == SimpleProperties::Break);
4741 it.next(); // Move Past EndOfAttribute
4742
4743 if(lcp && tmp.AttributeStorageType == NDB_STORAGETYPE_DISK)
4744 {
4745 disk++;
4746 continue;
4747 }
4748
4749 if (tmp.AttributeArrayType != NDB_ARRAYTYPE_FIXED)
4750 varsize++;
4751
4752 if (tmp.AttributeNullableFlag)
4753 null++;
4754
4755 if (tmp.AttributeSize == 0)
4756 {
4757 tabPtr.p->maxRecordSize += (tmp.AttributeArraySize + 31) >> 5;
4758 }
4759 else
4760 {
4761 const Uint32 arr = tmp.AttributeArraySize;
4762 const Uint32 sz = 1 << tmp.AttributeSize;
4763 const Uint32 sz32 = (sz * arr + 31) >> 5;
4764
4765 tabPtr.p->maxRecordSize += sz32;
4766 }
4767 }
4768
4769 tabPtr.p->attrInfoLen = 1;
4770
4771 if (lcp)
4772 {
4773 Dbtup* tup = (Dbtup*)globalData.getBlock(DBTUP, instance());
4774 tabPtr.p->maxRecordSize = 1 + tup->get_max_lcp_record_size(tmpTab.TableId);
4775 }
4776 else
4777 {
4778 // mask
4779 tabPtr.p->maxRecordSize += 1 + ((tmpTab.NoOfAttributes + null + 31) >> 5);
4780 tabPtr.p->maxRecordSize += (2 * varsize + 3) / 4;
4781 }
4782
4783 return true;
4784 }
4785
4786 void
execDIH_SCAN_TAB_CONF(Signal * signal)4787 Backup::execDIH_SCAN_TAB_CONF(Signal* signal)
4788 {
4789 jamEntry();
4790 DihScanTabConf * conf = (DihScanTabConf*)signal->getDataPtr();
4791 const Uint32 fragCount = conf->fragmentCount;
4792 const Uint32 tableId = conf->tableId;
4793 const Uint32 senderData = conf->senderData;
4794 const Uint32 scanCookie = conf->scanCookie;
4795 ndbrequire(conf->reorgFlag == 0); // no backup during table reorg
4796
4797 BackupRecordPtr ptr;
4798 c_backupPool.getPtr(ptr, senderData);
4799
4800 TablePtr tabPtr;
4801 ndbrequire(findTable(ptr, tabPtr, tableId));
4802
4803 tabPtr.p->m_scan_cookie = scanCookie;
4804 ndbrequire(tabPtr.p->fragments.seize(fragCount) != false);
4805 for(Uint32 i = 0; i<fragCount; i++) {
4806 jam();
4807 FragmentPtr fragPtr;
4808 tabPtr.p->fragments.getPtr(fragPtr, i);
4809 fragPtr.p->scanned = 0;
4810 fragPtr.p->scanning = 0;
4811 fragPtr.p->tableId = tableId;
4812 fragPtr.p->fragmentId = i;
4813 fragPtr.p->lqhInstanceKey = 0;
4814 fragPtr.p->node = 0;
4815 }//for
4816
4817 /**
4818 * Next table
4819 */
4820 if(ptr.p->tables.next(tabPtr)) {
4821 jam();
4822 DihScanTabReq * req = (DihScanTabReq*)signal->getDataPtrSend();
4823 req->senderRef = reference();
4824 req->senderData = ptr.i;
4825 req->tableId = tabPtr.p->tableId;
4826 req->schemaTransId = 0;
4827 sendSignal(DBDIH_REF, GSN_DIH_SCAN_TAB_REQ, signal,
4828 DihScanTabReq::SignalLength, JBB);
4829 return;
4830 }//if
4831
4832 ptr.p->tables.first(tabPtr);
4833 getFragmentInfo(signal, ptr, tabPtr, 0);
4834 }
4835
4836 void
getFragmentInfo(Signal * signal,BackupRecordPtr ptr,TablePtr tabPtr,Uint32 fragNo)4837 Backup::getFragmentInfo(Signal* signal,
4838 BackupRecordPtr ptr, TablePtr tabPtr, Uint32 fragNo)
4839 {
4840 jam();
4841
4842 for(; tabPtr.i != RNIL; ptr.p->tables.next(tabPtr)) {
4843 jam();
4844 const Uint32 fragCount = tabPtr.p->fragments.getSize();
4845 for(; fragNo < fragCount; fragNo ++) {
4846 jam();
4847 FragmentPtr fragPtr;
4848 tabPtr.p->fragments.getPtr(fragPtr, fragNo);
4849
4850 if(fragPtr.p->scanned == 0 && fragPtr.p->scanning == 0) {
4851 jam();
4852 DihScanGetNodesReq* req = (DihScanGetNodesReq*)signal->getDataPtrSend();
4853 req->senderRef = reference();
4854 req->tableId = tabPtr.p->tableId;
4855 req->scanCookie = tabPtr.p->m_scan_cookie;
4856 req->fragCnt = 1;
4857 req->fragItem[0].senderData = ptr.i;
4858 req->fragItem[0].fragId = fragNo;
4859 sendSignal(DBDIH_REF, GSN_DIH_SCAN_GET_NODES_REQ, signal,
4860 DihScanGetNodesReq::FixedSignalLength
4861 + DihScanGetNodesReq::FragItem::Length,
4862 JBB);
4863 return;
4864 }//if
4865 }//for
4866
4867 DihScanTabCompleteRep*rep= (DihScanTabCompleteRep*)signal->getDataPtrSend();
4868 rep->tableId = tabPtr.p->tableId;
4869 rep->scanCookie = tabPtr.p->m_scan_cookie;
4870 sendSignal(DBDIH_REF, GSN_DIH_SCAN_TAB_COMPLETE_REP, signal,
4871 DihScanTabCompleteRep::SignalLength, JBB);
4872
4873 fragNo = 0;
4874 }//for
4875
4876
4877 getFragmentInfoDone(signal, ptr);
4878 }
4879
4880 void
execDIH_SCAN_GET_NODES_CONF(Signal * signal)4881 Backup::execDIH_SCAN_GET_NODES_CONF(Signal* signal)
4882 {
4883 jamEntry();
4884
4885 /**
4886 * Assume only short CONFs with a single FragItem as we only do single
4887 * fragment requests in DIH_SCAN_GET_NODES_REQ from Backup::getFragmentInfo.
4888 */
4889 ndbrequire(signal->getNoOfSections() == 0);
4890 ndbassert(signal->getLength() ==
4891 DihScanGetNodesConf::FixedSignalLength
4892 + DihScanGetNodesConf::FragItem::Length);
4893
4894 DihScanGetNodesConf* conf = (DihScanGetNodesConf*)signal->getDataPtrSend();
4895 const Uint32 tableId = conf->tableId;
4896 const Uint32 senderData = conf->fragItem[0].senderData;
4897 const Uint32 nodeCount = conf->fragItem[0].count;
4898 const Uint32 fragNo = conf->fragItem[0].fragId;
4899 const Uint32 instanceKey = conf->fragItem[0].instanceKey;
4900
4901 ndbrequire(nodeCount > 0 && nodeCount <= MAX_REPLICAS);
4902
4903 BackupRecordPtr ptr;
4904 c_backupPool.getPtr(ptr, senderData);
4905
4906 TablePtr tabPtr;
4907 ndbrequire(findTable(ptr, tabPtr, tableId));
4908
4909 FragmentPtr fragPtr;
4910 tabPtr.p->fragments.getPtr(fragPtr, fragNo);
4911 fragPtr.p->lqhInstanceKey = instanceKey;
4912
4913 fragPtr.p->node = conf->fragItem[0].nodes[0];
4914
4915 getFragmentInfo(signal, ptr, tabPtr, fragNo + 1);
4916 }
4917
4918 void
getFragmentInfoDone(Signal * signal,BackupRecordPtr ptr)4919 Backup::getFragmentInfoDone(Signal* signal, BackupRecordPtr ptr)
4920 {
4921 ptr.p->m_gsn = GSN_DEFINE_BACKUP_CONF;
4922 ptr.p->slaveState.setState(DEFINED);
4923 DefineBackupConf * conf = (DefineBackupConf*)signal->getDataPtr();
4924 conf->backupPtr = ptr.i;
4925 conf->backupId = ptr.p->backupId;
4926 sendSignal(ptr.p->masterRef, GSN_DEFINE_BACKUP_CONF, signal,
4927 DefineBackupConf::SignalLength, JBB);
4928 }
4929
4930
4931 /*****************************************************************************
4932 *
4933 * Slave functionallity: Start backup
4934 *
4935 *****************************************************************************/
4936 void
execSTART_BACKUP_REQ(Signal * signal)4937 Backup::execSTART_BACKUP_REQ(Signal* signal)
4938 {
4939 jamEntry();
4940
4941 CRASH_INSERTION((10015));
4942
4943 StartBackupReq* req = (StartBackupReq*)signal->getDataPtr();
4944 const Uint32 ptrI = req->backupPtr;
4945
4946 BackupRecordPtr ptr;
4947 c_backupPool.getPtr(ptr, ptrI);
4948
4949 ptr.p->slaveState.setState(STARTED);
4950 ptr.p->m_gsn = GSN_START_BACKUP_REQ;
4951
4952 /**
4953 * Start file threads...
4954 */
4955 BackupFilePtr filePtr;
4956 for(ptr.p->files.first(filePtr); filePtr.i!=RNIL;ptr.p->files.next(filePtr))
4957 {
4958 jam();
4959 if(! (filePtr.p->m_flags & BackupFile::BF_FILE_THREAD))
4960 {
4961 jam();
4962 filePtr.p->m_flags |= BackupFile::BF_FILE_THREAD;
4963 signal->theData[0] = BackupContinueB::START_FILE_THREAD;
4964 signal->theData[1] = filePtr.i;
4965 sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
4966 }//if
4967 }//for
4968
4969 /**
4970 * Tell DBTUP to create triggers
4971 */
4972 TablePtr tabPtr;
4973 ndbrequire(ptr.p->tables.first(tabPtr));
4974 sendCreateTrig(signal, ptr, tabPtr);
4975 }
4976
4977 /*****************************************************************************
4978 *
4979 * Slave functionallity: Backup fragment
4980 *
4981 *****************************************************************************/
4982 void
execBACKUP_FRAGMENT_REQ(Signal * signal)4983 Backup::execBACKUP_FRAGMENT_REQ(Signal* signal)
4984 {
4985 jamEntry();
4986 BackupFragmentReq* req = (BackupFragmentReq*)signal->getDataPtr();
4987
4988 CRASH_INSERTION((10016));
4989
4990 const Uint32 ptrI = req->backupPtr;
4991 //const Uint32 backupId = req->backupId;
4992 const Uint32 tableId = req->tableId;
4993 const Uint32 fragNo = req->fragmentNo;
4994 const Uint32 count = req->count;
4995
4996 /**
4997 * Get backup record
4998 */
4999 BackupRecordPtr ptr;
5000 c_backupPool.getPtr(ptr, ptrI);
5001
5002 ptr.p->slaveState.setState(SCANNING);
5003 ptr.p->m_gsn = GSN_BACKUP_FRAGMENT_REQ;
5004
5005 /**
5006 * Get file
5007 */
5008 BackupFilePtr filePtr;
5009 c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr);
5010
5011 ndbrequire(filePtr.p->backupPtr == ptrI);
5012 ndbrequire(filePtr.p->m_flags ==
5013 (BackupFile::BF_OPEN | BackupFile::BF_FILE_THREAD));
5014
5015 /**
5016 * Get table
5017 */
5018 TablePtr tabPtr;
5019 ndbrequire(findTable(ptr, tabPtr, tableId));
5020
5021 /**
5022 * Get fragment
5023 */
5024 FragmentPtr fragPtr;
5025 tabPtr.p->fragments.getPtr(fragPtr, fragNo);
5026
5027 ndbrequire(fragPtr.p->scanned == 0);
5028 ndbrequire(fragPtr.p->scanning == 0 ||
5029 refToNode(ptr.p->masterRef) == getOwnNodeId());
5030
5031 /**
5032 * Init operation
5033 */
5034 if(filePtr.p->tableId != tableId) {
5035 jam();
5036 filePtr.p->operation.init(tabPtr);
5037 filePtr.p->tableId = tableId;
5038 }//if
5039
5040 /**
5041 * Check for space in buffer
5042 */
5043 if(!filePtr.p->operation.newFragment(tableId, fragPtr.p->fragmentId)) {
5044 jam();
5045 req->count = count + 1;
5046 sendSignalWithDelay(reference(), GSN_BACKUP_FRAGMENT_REQ, signal,
5047 WaitDiskBufferCapacityMillis,
5048 signal->length());
5049 ptr.p->slaveState.setState(STARTED);
5050 return;
5051 }//if
5052
5053 /**
5054 * Mark things as "in use"
5055 */
5056 fragPtr.p->scanning = 1;
5057 filePtr.p->fragmentNo = fragPtr.p->fragmentId;
5058 filePtr.p->m_retry_count = 0;
5059
5060 if (ptr.p->is_lcp())
5061 {
5062 jam();
5063 filePtr.p->fragmentNo = 0;
5064 }
5065
5066 sendScanFragReq(signal, ptr, filePtr, tabPtr, fragPtr, 0);
5067 }
5068
5069 /**
5070 * Backups and LCPs are actions that operate on a long time-scale compared to
5071 * other activities in the cluster. We also have a number of similar
5072 * activities that operate on a longer time scale. These operations have to
5073 * continue to operate at some decent level even if user transactions are
5074 * arriving at extreme rates.
5075 *
5076 * Not providing sufficient activity for LCPs might mean that we run out of
5077 * REDO log, this means that no writing user transactions are allowed until
5078 * we have completed an LCP. Clearly this is not a desirable user experience.
5079 * So we need to find a balance between long-term needs and short-term needs
5080 * in scheduling LCPs and Backups versus normal user transactions.
5081 *
5082 * When designing those scheduling algorithms we need to remember the design
5083 * aim for the NDB storage engine. We want to ensure that NDB can be used in
5084 * soft real-time applications such as financial applications, telecom
5085 * applications. We do not aim for hard real-time applications such as
5086 * controlling power plants where missing a deadline can lead to major
5087 * catastrophies.
5088 *
5089 * Using NDB for a soft real-time application can still be done at different
5090 * levels of real-time requirements. If the aim is to provide that more or
5091 * less 100% of the transactions complete in say 100 microseconds then a
5092 * certain level of control is needed also from the application.
5093 *
5094 * Things that will affect scheduling in NDB are:
5095 * 1) Use of large rows
5096 * NDB will schedule at least one row at a time. There are currently very
5097 * few places where execution of one row operation contains breaks for
5098 * scheduling. Executing a row operation on the maximum row size of
5099 * around 14 kBytes means that signals can execute for up to about 20
5100 * microseconds as of 2015. Clearly using smaller rows can give a better
5101 * response time experience.
5102 *
5103 * 2) Using complex conditions per row
5104 * NDB supports pushing down conditions on rows in both key operations and
5105 * scan operations and even on join operations. Clearly if these pushed
5106 * conditions are very complex the time to execute them per row can extend
5107 * the time spent in executing one particular signal. Normal conditions
5108 * involving one or a number of columns doesn't present a problem but
5109 * SQL have no specific limits on conditions, so extremely complex
5110 * conditions are possible to construct.
5111 *
5112 * 3) Metadata operations
5113 * Creating tables, indexes can contain some operations that take a bit
5114 * longer to execute. However using the multi-threaded data nodes (ndbmtd)
5115 * means that most of these signals are executed in threads that are not
5116 * used for normal user transactions. So using ndbmtd is here a method to
5117 * decrease impact of response time of metadata operations.
5118 *
5119 * 4) Use of ndbd vs ndbmtd
5120 * ndbd is a single threaded data node, ndbd does receive data, operate on
5121 * the data and send the data all in one thread. In low load cases with
5122 * very high requirements on response time and strict control of the
5123 * application layer the use of ndbd for real-time operation can be
5124 * beneficial.
5125 *
5126 * Important here is to understand that the single-threaded nature of ndbd
5127 * means that it is limited in throughput. One data node using ndbd is
5128 * limited to handling on the order of 100.000 row operations per second
5129 * with maintained responsiveness as of 2015. ndbmtd can achieve a few
5130 * million row operations in very large configurations with maintained
5131 * responsiveness.
5132 *
5133 * When looking at maintaining a balance between various operations long-term
5134 * it is important to consider what types of operations that can go in parallel
5135 * in an NDB data node. These are the activities currently possible.
5136 *
5137 * 1) Normal user transactions
5138 * These consist of primary key row operations, unique key row operations
5139 * (these are implemented as two primary key row operations), scan operations
5140 * and finally a bit more complex operations that can have both key
5141 * operations and scan operations as part of them. The last category is
5142 * created as part of executing SPJ operation trees that currently is used
5143 * for executing complex SQL queries.
5144 *
5145 * 2) Local checkpoints (LCPs)
5146 * These can operate continously without user interaction. The LCPs are
5147 * needed to ensure that we can cut the REDO log. If LCPs execute too slow
5148 * the we won't have sufficient REDO log to store all user transactions that
5149 * are writing on logging tables.
5150 *
5151 * 3) Backups
5152 * These are started by a user, only one backup at a time is allowed. These
5153 * can be stored offsite and used by the user to restore NDB to a former
5154 * state, either as an emergency fix, it can also be used to start up a
5155 * new cluster or as part of setting up a slave cluster. A backup consists
5156 * of a data file per data node and one log file of changes since the backup
5157 * started and a control file. It is important that the backup maintains a
5158 * level of speed such that the system doesn't run out of disk space for the
5159 * log file.
5160 *
5161 * 4) Metadata operations
5162 * There are many different types of metadata operations. One can define
5163 * new tables, indexes, foreign keys, tablespaces. One can also rearrange
5164 * the tables for a new number of nodes as part of adding nodes to the
5165 * cluster. There are also operations to analyse tables, optimise tables
5166 * and so forth. Most of these are fairly short in duration and usage of
5167 * resources. But there are a few of them such as rearranging tables for
5168 * a new set of nodes that require shuffling data around in the cluster.
5169 * This can be a fairly long-running operation.
5170 *
5171 * 5) Event operations
5172 * To support replication from one MySQL Cluster to another MySQL Cluster
5173 * or a different MySQL storage engine we use event operations.
5174 * These operate always as part of the normal user transactions, so they
5175 * do not constitute anything to consider in the balance between long-term
5176 * and short-term needs. In addition in ndbmtd much of the processing happens
5177 * in a special thread for event operations.
5178 *
5179 * 6) Node synchronisation during node recovery
5180 * Recovery as such normally happens when no user transactions are happening
5181 * so thus have no special requirements on maintaining a balance between
5182 * short-term needs and long-term needs since recovery is always a long-term
5183 * operation that has no competing short-term operations. There is however
5184 * one exception to this and this is during node recovery when the starting
5185 * node needs to synchronize its data with a live node. In this case the
5186 * starting node has recovered an old version of the data node using LCPs
5187 * and REDO logs and have rebuilt the indexes. At this point it needs to
5188 * synchronize the data in each table with a live node within the same node
5189 * group.
5190 *
5191 * This synchronization happens row by row controlled by the live node. The
5192 * live scans its own data and checks each row to the global checkpoint id
5193 * (GCI) that the starting node has restored. If the row has been updated
5194 * with a more recent GCI then the row needs to be sent over to the starting
5195 * node.
5196 *
5197 * Only one node recovery per node group at a time is possible when using
5198 * two replicas.
5199 *
5200 * So there can be as many as 4 long-term operations running in parallel to
5201 * the user transactions. These are 1 LCP scan, 1 Backup scan, 1 node recovery
5202 * scan and finally 1 metadata scan. All of these long-running operations
5203 * perform scans of table partitions (fragments). LCPs scan a partition and
5204 * write rows into a LCP file. Backups scan a partition and write its result
5205 * into a backup file. Node recovery scans searches for rows that have been
5206 * updated since the GCI recovered in the starting node and for each row
5207 * found it is sent over to the starting node. Metadata scans for either
5208 * all rows or using some condition and then can use this information to
5209 * send the row to another node, to build an index, to build a foreign key
5210 * index or other online operation which is performed in parallel to user
5211 * transactions.
5212 *
5213 * From this analysis it's clear that we don't want any long-running operation
5214 * to consume any major part of the resources. It's desirable that user
5215 * transactions can use at least about half of the resources even when running
5216 * in parallel with all four of those activities. Node recovery is slightly
5217 * more important than the other activities, this means that our aim should
5218 * be to ensure that LCPs, Backups and metadata operations can at least use
5219 * about 10% of the CPU resources and that node recovery operations can use
5220 * at least about 20% of the CPU resources. Obviously they should be able to
5221 * use more resources when there is less user transactions competing for the
5222 * resources. But we should try to maintain this level of CPU usage for LCPs
5223 * and Backups even when the user load is at extreme levels.
5224 *
5225 * There is no absolute way of ensuring 10% CPU usage for a certain activity.
5226 * We use a number of magic numbers controlling the algorithms to ensure this.
5227 *
5228 * At first we use the coding rule that one signal should never execute for
5229 * more than 10 microseconds in the normal case. There are exceptions to this
5230 * rule as explained above, but it should be outliers that won't affect the
5231 * long-term rates very much.
5232 *
5233 * Second we use the scheduling classes we have access to. The first is B-level
5234 * signals, these can have an arbitrary long queue of other jobs waiting before
5235 * they are executed, so these have no bound on when they execute. We also
5236 * have special signals that execute with a bounded delay, in one signal they
5237 * can be delayed more than a B-level signal, but the scheduler ensures that
5238 * at most 100 B-level signals execute before they are executed. Normally it
5239 * would even operate with at most 75 B-level signals executed even in high
5240 * load scenarios and mostly even better than that. We achieve this by calling
5241 * sendSignalWithDelay with timeout BOUNDED_DELAY.
5242 *
5243 * So how fast can an LCP run that is using about 10% of the CPU. In a fairly
5244 * standard CPU of 2015, not a high-end, but also not at the very low-end,
5245 * the CPU can produce about 150 MBytes of data for LCPs per second. This is
5246 * using 100 byte rows. So this constitutes about 1.5M rows per second plus
5247 * transporting 150 MBytes of data to the write buffers in the Backup block.
5248 * So we use a formula here where we assume that the fixed cost of scanning
5249 * a row is about 550 ns and cost per word of data is 4 ns. The reason we
5250 * a different formula for LCP scans compared to the formula we assume in
5251 * DBLQH for generic scans is that the copy of data is per row for LCPs
5252 * whereas it is per column for generic scans. Similarly we never use any
5253 * scan filters for LCPs, we only check for LCP_SKIP bits and FREE bits.
5254 * This is much more efficient compared to generic scan filters.
5255 *
5256 * At very high load we will assume that we have to wait about 50 signals
5257 * when sending BOUNDED_DELAY signals. Worst case can be up to about 100
5258 * signals, but the worst case won't happen very often and more common
5259 * will be much less than that.
5260 * The mean execution time of signals are about 5 microseconds. This means
5261 * that by constantly using bounded delay signals we ensure that we get at
5262 * least around 4000 executions per second. So this means that
5263 * in extreme overload situations we can allow for execution to go on
5264 * for up to about 25 microseconds without giving B-level signals access.
5265 * 25 microseconds times 4000 is 100 milliseconds so about 10% of the
5266 * CPU usage.
5267 *
5268 * LCPs and Backups also operate using conditions on how fast they can write
5269 * to the disk subsystem. The user can configure these numbers, the LCPs
5270 * and Backups gets a quota per 100 millisecond. So if the LCPs and Backups
5271 * runs too fast they will pause a part of those 100 milliseconds. However
5272 * it is a good idea to set the minimum disk write speed to at least 20%
5273 * of the possible CPU speed. So this means setting it to 30 MByte per
5274 * second. In high-load scenarios we might not be able to process more
5275 * than 15 MByte per second, but as soon as user load and other load
5276 * goes down we will get back to the higher write speed.
5277 *
5278 * Scans operate in the following fashion which is an important input to
5279 * the construction of the magic numbers. We start a scan with SCAN_FRAGREQ
5280 * and here we don't really know the row sizes other than the maximum row
5281 * size. This SCAN_FRAGREQ will return 16 rows and then it will return
5282 * SCAN_FRAGCONF. For each row it will return a TRANSID_AI signal.
5283 * If we haven't used our quota for writing LCPs and Backups AND there is
5284 * still room in the backup write buffer then we will continue with another
5285 * set of 16 rows. These will be retrieved using the SCAN_NEXTREQ signal
5286 * and the response to this signal will be SCAN_FRAGCONF when done with the
5287 * 16 rows (or all rows scanned).
5288 *
5289 * Processing 16 rows takes about 8800 ns on standard HW of 2015 and so even
5290 * for minimal rows we will use at least 10000 ns if we execute an entire batch
5291 * of 16 rows without providing access for other B-level signals. So the
5292 * absolute maximum amount of rows that we will ever execute without
5293 * giving access for B-level signals are 32 rows so that we don't go beyond
5294 * the allowed quota of 25 microsecond without giving B-level priority signal
5295 * access, this means two SCAN_FRAGREQ/SCAN_NEXTREQ executions.
5296 *
5297 * Using the formula we derive that we should never start another set of
5298 * 16 rows if we have passed 1500 words in the previous batch of 16 rows.
5299 * Even when deciding in the Backup block to send an entire batch of 16
5300 * rows at A-level we will never allow to continue gathering when we have
5301 * already gathered more than 4000 words. When we reach this limit we will
5302 * send another bounded delay signal. The reason is that we've already
5303 * reached sufficient CPU usage and going further would go beyond 15%.
5304 *
5305 * The boundary 1500 and 4000 is actually based on using 15% of the CPU
5306 * resources which is better if not all four activities happen at the
5307 * same time. When we support rate control on all activities we need to
5308 * adaptively decrease this limit to ensure that the total rate controlled
5309 * efforts doesn't go beyond 50%.
5310 *
5311 * The limit 4000 is ZMAX_WORDS_PER_SCAN_BATCH_HIGH_PRIO set in DblqhMain.cpp.
5312 * This constant limit the impact of wide rows on responsiveness.
5313 *
5314 * The limit 1500 is MAX_LCP_WORDS_PER_BATCH set in this block.
5315 * This constant limit the impact of row writes on LCP writes.
5316 *
5317 * When operating in normal mode, we will not continue gathering when we
5318 * already gathered at least 500 words. However we will only operate in
5319 * this mode when we are in low load scenario in which case this speed will
5320 * be quite sufficient. This limit is to ensure that we don't go beyond
5321 * normal real-time break limits in normal operations. This limits LCP
5322 * execution during normal load to around 3-4 microseconds.
5323 *
5324 * In the following paragraph a high priority of LCPs means that we need to
5325 * raise LCP priority to maintain LCP write rate at the expense of user
5326 * traffic responsiveness. Low priority means that we can get sufficient
5327 * LCP write rates even with normal responsiveness to user requests.
5328 *
5329 * Finally we have to make a decision when we should execute at high priority
5330 * and when operating at normal priority. Obviously we should avoid entering
5331 * high priority mode as much as possible since it will affect response times.
5332 * At the same time once we have entered this mode we need to have some
5333 * memory of it. The reason is that we will have lost some ground while
5334 * executing at normal priority when the job buffers were long. We will limit
5335 * the memory to at most 16 executions of 16 rows at high priority. Each
5336 * time we start a new execution we will see if we need to add to this
5337 * "memory". We will add one per 48 signals that we had to wait for between
5338 * executing a set of 16 rows (normally this means execution of 3 bounded
5339 * delay signals). When the load level is even higher than we will add to
5340 * the memory such that we operate in high priority mode a bit longer since
5341 * we are likely to have missed a bit more opportunity to perform LCP scans
5342 * in this overload situation.
5343 *
5344 * The following "magic" constants control these algorithms:
5345 * 1) ZMAX_SCAN_DIRECT_COUNT set to 5
5346 * Means that at most 6 rows will be scanned per execute direct, set in
5347 * Dblqh.hpp. This applies to all scan types, not only to LCP scans.
5348 *
5349 * 2) ZMAX_WORDS_PER_SCAN_BATCH_LOW_PRIO set to 500
5350 * This controls the maximum number of words that is allowed to be gathered
5351 * before we decide to do a real-time break when executing at normal
5352 * priority level. This is defined in DblqhMain.cpp
5353 *
5354 * 3) ZMAX_WORDS_PER_SCAN_BATCH_HIGH_PRIO set to 4000
5355 * This controls the maximum words gathered before we decide to send the
5356 * next row to be scanned in another bounded delay signal. This is defined in
5357 * DblqhMain.cpp
5358 *
5359 * 4) MAX_LCP_WORDS_PER_BATCH set to 1500
5360 * This defines the maximum size gathered at A-level to allow for execution
5361 * of one more batch at A-level. This is defined here in Backup.cpp.
5362 *
5363 * 5) HIGH_LOAD_LEVEL set to 32
5364 * Limit of how many signals have been executed in this LDM thread since
5365 * starting last 16 rowsin order to enter high priority mode.
5366 * Defined in this block Backup.cpp.
5367 *
5368 * 6) VERY_HIGH_LOAD_LEVEL set to 48
5369 * For each additional of this we increase the memory. So e.g. with 80 signals
5370 * executed since last we will increase the memory by two, with 128 we will
5371 * increase it by three. Thus if #signals >= (32 + 48) => 2, #signals >=
5372 * (32 + 48 * 2) => 3 and so forth. Memory here means that we will remember
5373 * the high load until we have compensated for it in a sufficient manner, so
5374 * we will retain executing on high priority for a bit longer to compensate
5375 * for what we lost during execution at low priority when load suddenly
5376 * increased.
5377 * Defined in this block Backup.cpp.
5378 *
5379 * 7) MAX_RAISE_PRIO_MEMORY set to 16
5380 * Max memory of priority raising, so after load disappears we will at most
5381 * an additional set of 16*16 rows at high priority mode before going back to
5382 * normal priority mode.
5383 * Defined in this block Backup.cpp.
5384 *
5385 * 8) NUMBER_OF_SIGNALS_PER_SCAN_BATCH set to 3
5386 * When starting up the algorithm we check how many signals are in the
5387 * B-level job buffer. Based on this number we set the initial value to
5388 * high priority or not. This is based on that we expect a set of 16
5389 * rows to be executed in 3 signals with 6 rows, 6 rows and last signal
5390 * 4 rows.
5391 * Defined in this block Backup.cpp.
5392 */
5393
5394 /**
5395 * These routines are more or less our scheduling logic for LCPs. This is
5396 * how we try to achieve a balanced output from LCPs while still
5397 * processing normal transactions at a high rate.
5398 */
init_scan_prio_level(Signal * signal,BackupRecordPtr ptr)5399 void Backup::init_scan_prio_level(Signal *signal, BackupRecordPtr ptr)
5400 {
5401 Uint32 level = getSignalsInJBB();
5402 if ((level * NUMBER_OF_SIGNALS_PER_SCAN_BATCH) > HIGH_LOAD_LEVEL)
5403 {
5404 /* Ensure we use prio A and only 1 signal at prio A */
5405 jam();
5406 level = VERY_HIGH_LOAD_LEVEL;
5407 }
5408 ptr.p->m_lastSignalId = signal->getSignalId() - level;
5409 ptr.p->m_prioA_scan_batches_to_execute = 0;
5410 }
5411
5412 bool
check_scan_if_raise_prio(Signal * signal,BackupRecordPtr ptr)5413 Backup::check_scan_if_raise_prio(Signal *signal, BackupRecordPtr ptr)
5414 {
5415 bool flag = false;
5416 const Uint32 current_signal_id = signal->getSignalId();
5417 const Uint32 lastSignalId = ptr.p->m_lastSignalId;
5418 Uint32 prioA_scan_batches_to_execute =
5419 ptr.p->m_prioA_scan_batches_to_execute;
5420 const Uint32 num_signals_executed = current_signal_id - lastSignalId;
5421
5422 if (num_signals_executed > HIGH_LOAD_LEVEL)
5423 {
5424 jam();
5425 prioA_scan_batches_to_execute+=
5426 ((num_signals_executed + (VERY_HIGH_LOAD_LEVEL - 1)) /
5427 VERY_HIGH_LOAD_LEVEL);
5428 if (prioA_scan_batches_to_execute > MAX_RAISE_PRIO_MEMORY)
5429 {
5430 jam();
5431 prioA_scan_batches_to_execute = MAX_RAISE_PRIO_MEMORY;
5432 }
5433 }
5434 if (prioA_scan_batches_to_execute > 0)
5435 {
5436 jam();
5437 prioA_scan_batches_to_execute--;
5438 flag = true;
5439 }
5440 ptr.p->m_lastSignalId = current_signal_id;
5441 ptr.p->m_prioA_scan_batches_to_execute = prioA_scan_batches_to_execute;
5442 return flag;;
5443 }
5444
5445 void
sendScanFragReq(Signal * signal,Ptr<BackupRecord> ptr,Ptr<BackupFile> filePtr,Ptr<Table> tabPtr,Ptr<Fragment> fragPtr,Uint32 delay)5446 Backup::sendScanFragReq(Signal* signal,
5447 Ptr<BackupRecord> ptr,
5448 Ptr<BackupFile> filePtr,
5449 Ptr<Table> tabPtr,
5450 Ptr<Fragment> fragPtr,
5451 Uint32 delay)
5452 {
5453 /**
5454 * Start scan
5455 */
5456 {
5457 filePtr.p->m_flags |= BackupFile::BF_SCAN_THREAD;
5458
5459 Table & table = * tabPtr.p;
5460 ScanFragReq * req = (ScanFragReq *)signal->getDataPtrSend();
5461 const Uint32 parallelism = ZRESERVED_SCAN_BATCH_SIZE;
5462
5463 req->senderData = filePtr.i;
5464 req->resultRef = reference();
5465 req->schemaVersion = table.schemaVersion;
5466 req->fragmentNoKeyLen = fragPtr.p->fragmentId;
5467 req->requestInfo = 0;
5468 req->savePointId = 0;
5469 req->tableId = table.tableId;
5470 ScanFragReq::setReadCommittedFlag(req->requestInfo, 1);
5471 ScanFragReq::setLockMode(req->requestInfo, 0);
5472 ScanFragReq::setHoldLockFlag(req->requestInfo, 0);
5473 ScanFragReq::setKeyinfoFlag(req->requestInfo, 0);
5474 ScanFragReq::setTupScanFlag(req->requestInfo, 1);
5475 ScanFragReq::setNotInterpretedFlag(req->requestInfo, 1);
5476 if (ptr.p->is_lcp())
5477 {
5478 ScanFragReq::setScanPrio(req->requestInfo, 1);
5479 ScanFragReq::setNoDiskFlag(req->requestInfo, 1);
5480 ScanFragReq::setLcpScanFlag(req->requestInfo, 1);
5481 }
5482 filePtr.p->m_sent_words_in_scan_batch = 0;
5483 filePtr.p->m_num_scan_req_on_prioa = 0;
5484 init_scan_prio_level(signal, ptr);
5485 if (check_scan_if_raise_prio(signal, ptr))
5486 {
5487 jam();
5488 ScanFragReq::setPrioAFlag(req->requestInfo, 1);
5489 filePtr.p->m_num_scan_req_on_prioa = 1;
5490 }
5491
5492 req->transId1 = 0;
5493 req->transId2 = (BACKUP << 20) + (getOwnNodeId() << 8);
5494 req->clientOpPtr= filePtr.i;
5495 req->batch_size_rows= parallelism;
5496 req->batch_size_bytes= 0;
5497 BlockReference lqhRef = 0;
5498 bool delay_possible = true;
5499 if (ptr.p->is_lcp()) {
5500 lqhRef = calcInstanceBlockRef(DBLQH);
5501 } else {
5502 const Uint32 instanceKey = fragPtr.p->lqhInstanceKey;
5503 ndbrequire(instanceKey != 0);
5504 lqhRef = numberToRef(DBLQH, instanceKey, getOwnNodeId());
5505 if (lqhRef != calcInstanceBlockRef(DBLQH))
5506 {
5507 /* We can't send delayed signals to other threads. */
5508 delay_possible = false;
5509 }
5510 }
5511
5512 Uint32 attrInfo[25];
5513 memcpy(attrInfo, table.attrInfo, 4*table.attrInfoLen);
5514 LinearSectionPtr ptr[3];
5515 ptr[0].p = attrInfo;
5516 ptr[0].sz = table.attrInfoLen;
5517 if (delay_possible)
5518 {
5519 SectionHandle handle(this);
5520 ndbrequire(import(handle.m_ptr[0], ptr[0].p, ptr[0].sz));
5521 handle.m_cnt = 1;
5522 if (delay == 0)
5523 {
5524 jam();
5525 sendSignalWithDelay(lqhRef, GSN_SCAN_FRAGREQ, signal,
5526 BOUNDED_DELAY, ScanFragReq::SignalLength, &handle);
5527 }
5528 else
5529 {
5530 jam();
5531 sendSignalWithDelay(lqhRef, GSN_SCAN_FRAGREQ, signal,
5532 delay, ScanFragReq::SignalLength, &handle);
5533 }
5534 }
5535 else
5536 {
5537 /**
5538 * There is no way to send signals over to another thread at a rate
5539 * level at the moment. So we send at priority B, but the response
5540 * back to us will arrive at Priority A if necessary.
5541 */
5542 jam();
5543 sendSignal(lqhRef,
5544 GSN_SCAN_FRAGREQ,
5545 signal,
5546 ScanFragReq::SignalLength,
5547 JBB,
5548 ptr,
5549 1);
5550 }
5551 }
5552 }
5553
5554 void
execSCAN_HBREP(Signal * signal)5555 Backup::execSCAN_HBREP(Signal* signal)
5556 {
5557 jamEntry();
5558 }
5559
5560 void
execTRANSID_AI(Signal * signal)5561 Backup::execTRANSID_AI(Signal* signal)
5562 {
5563 jamEntry();
5564
5565 const Uint32 filePtrI = signal->theData[0];
5566 //const Uint32 transId1 = signal->theData[1];
5567 //const Uint32 transId2 = signal->theData[2];
5568 Uint32 dataLen = signal->length() - 3;
5569
5570 BackupFilePtr filePtr;
5571 c_backupFilePool.getPtr(filePtr, filePtrI);
5572
5573 OperationRecord & op = filePtr.p->operation;
5574
5575 /**
5576 * Unpack data
5577 */
5578 Uint32 * dst = op.dst;
5579 if (signal->getNoOfSections() == 0)
5580 {
5581 jam();
5582 const Uint32 * src = &signal->theData[3];
5583 * dst = htonl(dataLen);
5584 memcpy(dst + 1, src, 4*dataLen);
5585 }
5586 else
5587 {
5588 jam();
5589 SectionHandle handle(this, signal);
5590 SegmentedSectionPtr dataPtr;
5591 handle.getSection(dataPtr, 0);
5592 dataLen = dataPtr.sz;
5593
5594 * dst = htonl(dataLen);
5595 copy(dst + 1, dataPtr);
5596 releaseSections(handle);
5597 }
5598
5599 op.attrSzTotal += dataLen;
5600 ndbrequire(dataLen < op.maxRecordSize);
5601
5602 filePtr.p->m_sent_words_in_scan_batch += dataLen;
5603
5604 op.finished(dataLen);
5605
5606 op.newRecord(dst + dataLen + 1);
5607 }
5608
5609 void
update_lcp_pages_scanned(Signal * signal,Uint32 filePtrI,Uint32 scanned_pages)5610 Backup::update_lcp_pages_scanned(Signal *signal,
5611 Uint32 filePtrI,
5612 Uint32 scanned_pages)
5613 {
5614 BackupFilePtr filePtr;
5615 jamEntry();
5616
5617 c_backupFilePool.getPtr(filePtr, filePtrI);
5618
5619 OperationRecord & op = filePtr.p->operation;
5620
5621 op.set_scanned_pages(scanned_pages);
5622 }
5623
5624 void
init(const TablePtr & ptr)5625 Backup::OperationRecord::init(const TablePtr & ptr)
5626 {
5627 tablePtr = ptr.i;
5628 maxRecordSize = ptr.p->maxRecordSize;
5629 lcpScannedPages = 0;
5630 }
5631
5632 bool
newFragment(Uint32 tableId,Uint32 fragNo)5633 Backup::OperationRecord::newFragment(Uint32 tableId, Uint32 fragNo)
5634 {
5635 Uint32 * tmp;
5636 const Uint32 headSz = (sizeof(BackupFormat::DataFile::FragmentHeader) >> 2);
5637 const Uint32 sz = headSz + ZRESERVED_SCAN_BATCH_SIZE * maxRecordSize;
5638
5639 ndbrequire(sz < dataBuffer.getMaxWrite());
5640 if(dataBuffer.getWritePtr(&tmp, sz)) {
5641 jam();
5642 BackupFormat::DataFile::FragmentHeader * head =
5643 (BackupFormat::DataFile::FragmentHeader*)tmp;
5644
5645 head->SectionType = htonl(BackupFormat::FRAGMENT_HEADER);
5646 head->SectionLength = htonl(headSz);
5647 head->TableId = htonl(tableId);
5648 head->FragmentNo = htonl(fragNo);
5649 head->ChecksumType = htonl(0);
5650
5651 opNoDone = opNoConf = opLen = 0;
5652 newRecord(tmp + headSz);
5653 scanStart = tmp;
5654 scanStop = (tmp + headSz);
5655
5656 noOfRecords = 0;
5657 noOfBytes = 0;
5658 return true;
5659 }//if
5660 return false;
5661 }
5662
5663 bool
fragComplete(Uint32 tableId,Uint32 fragNo,bool fill_record)5664 Backup::OperationRecord::fragComplete(Uint32 tableId, Uint32 fragNo, bool fill_record)
5665 {
5666 Uint32 * tmp;
5667 const Uint32 footSz = sizeof(BackupFormat::DataFile::FragmentFooter) >> 2;
5668 Uint32 sz = footSz + 1;
5669
5670 if (fill_record)
5671 {
5672 Uint32 * new_tmp;
5673 if (!dataBuffer.getWritePtr(&tmp, sz))
5674 return false;
5675 new_tmp = tmp + sz;
5676
5677 if ((UintPtr)new_tmp & (sizeof(Page32)-1))
5678 {
5679 /* padding is needed to get full write */
5680 new_tmp += 2 /* to fit empty header minimum 2 words*/;
5681 new_tmp = (Uint32 *)(((UintPtr)new_tmp + sizeof(Page32)-1) &
5682 ~(UintPtr)(sizeof(Page32)-1));
5683 /* new write sz */
5684 sz = Uint32(new_tmp - tmp);
5685 }
5686 }
5687
5688 if(dataBuffer.getWritePtr(&tmp, sz)) {
5689 jam();
5690 * tmp = 0; // Finish record stream
5691 tmp++;
5692 BackupFormat::DataFile::FragmentFooter * foot =
5693 (BackupFormat::DataFile::FragmentFooter*)tmp;
5694 foot->SectionType = htonl(BackupFormat::FRAGMENT_FOOTER);
5695 foot->SectionLength = htonl(footSz);
5696 foot->TableId = htonl(tableId);
5697 foot->FragmentNo = htonl(fragNo);
5698 foot->NoOfRecords = htonl(Uint32(noOfRecords)); // TODO
5699 foot->Checksum = htonl(0);
5700
5701 if (sz != footSz + 1)
5702 {
5703 tmp += footSz;
5704 memset(tmp, 0, (sz - footSz - 1) * 4);
5705 *tmp = htonl(BackupFormat::EMPTY_ENTRY);
5706 tmp++;
5707 *tmp = htonl(sz - footSz - 1);
5708 }
5709
5710 dataBuffer.updateWritePtr(sz);
5711 return true;
5712 }//if
5713 return false;
5714 }
5715
5716 bool
newScan()5717 Backup::OperationRecord::newScan()
5718 {
5719 Uint32 * tmp;
5720 ndbrequire(ZRESERVED_SCAN_BATCH_SIZE * maxRecordSize < dataBuffer.getMaxWrite());
5721 if(dataBuffer.getWritePtr(&tmp, ZRESERVED_SCAN_BATCH_SIZE * maxRecordSize))
5722 {
5723 jam();
5724 opNoDone = opNoConf = opLen = 0;
5725 newRecord(tmp);
5726 scanStart = tmp;
5727 scanStop = tmp;
5728 return true;
5729 }//if
5730 return false;
5731 }
5732
5733 bool
closeScan()5734 Backup::OperationRecord::closeScan()
5735 {
5736 opNoDone = opNoConf = opLen = 0;
5737 return true;
5738 }
5739
5740 bool
scanConf(Uint32 noOfOps,Uint32 total_len)5741 Backup::OperationRecord::scanConf(Uint32 noOfOps, Uint32 total_len)
5742 {
5743 const Uint32 done = Uint32(opNoDone-opNoConf);
5744
5745 ndbrequire(noOfOps == done);
5746 ndbrequire(opLen == total_len);
5747 opNoConf = opNoDone;
5748
5749 const Uint32 len = Uint32(scanStop - scanStart);
5750 ndbrequire(len < dataBuffer.getMaxWrite());
5751 dataBuffer.updateWritePtr(len);
5752 noOfBytes += (len << 2);
5753 m_bytes_total += (len << 2);
5754 m_records_total += noOfOps;
5755 return true;
5756 }
5757
5758 void
execSCAN_FRAGREF(Signal * signal)5759 Backup::execSCAN_FRAGREF(Signal* signal)
5760 {
5761 jamEntry();
5762
5763 ScanFragRef * ref = (ScanFragRef*)signal->getDataPtr();
5764
5765 const Uint32 filePtrI = ref->senderData;
5766 BackupFilePtr filePtr;
5767 c_backupFilePool.getPtr(filePtr, filePtrI);
5768
5769 Uint32 errCode = ref->errorCode;
5770 if (filePtr.p->errorCode == 0)
5771 {
5772 // check for transient errors
5773 switch(errCode){
5774 case ScanFragRef::ZSCAN_BOOK_ACC_OP_ERROR:
5775 case ScanFragRef::NO_TC_CONNECT_ERROR:
5776 case ScanFragRef::ZTOO_MANY_ACTIVE_SCAN_ERROR:
5777 jam();
5778 break;
5779 case ScanFragRef::TABLE_NOT_DEFINED_ERROR:
5780 case ScanFragRef::DROP_TABLE_IN_PROGRESS_ERROR:
5781 jam();
5782 /**
5783 * The table was dropped either at start of LCP scan or in the
5784 * middle of it. We will complete in the same manner as if we
5785 * got a SCAN_FRAGCONF with close flag set. The idea is that
5786 * the content of the LCP file in this case is not going to
5787 * be used anyways, so we just ensure that we complete things
5788 * in an ordered manner and then the higher layers will ensure
5789 * that the files are dropped and taken care of.
5790 *
5791 * This handling will ensure that drop table can complete
5792 * much faster.
5793 */
5794 fragmentCompleted(signal, filePtr);
5795 return;
5796 default:
5797 jam();
5798 filePtr.p->errorCode = errCode;
5799 }
5800 }
5801
5802 if (filePtr.p->errorCode == 0)
5803 {
5804 jam();
5805 filePtr.p->m_retry_count++;
5806 if (filePtr.p->m_retry_count == 10)
5807 {
5808 jam();
5809 filePtr.p->errorCode = errCode;
5810 }
5811 }
5812
5813 if (filePtr.p->errorCode != 0)
5814 {
5815 jam();
5816 filePtr.p->m_flags &= ~(Uint32)BackupFile::BF_SCAN_THREAD;
5817 backupFragmentRef(signal, filePtr);
5818 }
5819 else
5820 {
5821 jam();
5822
5823 // retry
5824
5825 BackupRecordPtr ptr;
5826 c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
5827 TablePtr tabPtr;
5828 ndbrequire(findTable(ptr, tabPtr, filePtr.p->tableId));
5829 FragmentPtr fragPtr;
5830 tabPtr.p->fragments.getPtr(fragPtr, filePtr.p->fragmentNo);
5831 sendScanFragReq(signal, ptr, filePtr, tabPtr, fragPtr,
5832 WaitScanTempErrorRetryMillis);
5833 }
5834 }
5835
5836 void
execSCAN_FRAGCONF(Signal * signal)5837 Backup::execSCAN_FRAGCONF(Signal* signal)
5838 {
5839 jamEntry();
5840
5841 CRASH_INSERTION((10017));
5842
5843 ScanFragConf * conf = (ScanFragConf*)signal->getDataPtr();
5844
5845 const Uint32 filePtrI = conf->senderData;
5846 BackupFilePtr filePtr;
5847 c_backupFilePool.getPtr(filePtr, filePtrI);
5848
5849 OperationRecord & op = filePtr.p->operation;
5850
5851 op.scanConf(conf->completedOps, conf->total_len);
5852 const Uint32 completed = conf->fragmentCompleted;
5853 if(completed != 2) {
5854 jam();
5855
5856 BackupRecordPtr ptr;
5857 c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
5858 checkScan(signal, ptr, filePtr);
5859 return;
5860 }//if
5861
5862 fragmentCompleted(signal, filePtr);
5863 }
5864
5865 void
fragmentCompleted(Signal * signal,BackupFilePtr filePtr)5866 Backup::fragmentCompleted(Signal* signal, BackupFilePtr filePtr)
5867 {
5868 jam();
5869
5870 if(filePtr.p->errorCode != 0)
5871 {
5872 jam();
5873 filePtr.p->m_flags &= ~(Uint32)BackupFile::BF_SCAN_THREAD;
5874 backupFragmentRef(signal, filePtr); // Scan completed
5875 return;
5876 }//if
5877
5878 BackupRecordPtr ptr;
5879 c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
5880
5881 OperationRecord & op = filePtr.p->operation;
5882 if(!op.fragComplete(filePtr.p->tableId, filePtr.p->fragmentNo,
5883 c_defaults.m_o_direct))
5884 {
5885 jam();
5886 signal->theData[0] = BackupContinueB::BUFFER_FULL_FRAG_COMPLETE;
5887 signal->theData[1] = filePtr.i;
5888 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal,
5889 WaitDiskBufferCapacityMillis, 2);
5890 return;
5891 }//if
5892
5893 filePtr.p->m_flags &= ~(Uint32)BackupFile::BF_SCAN_THREAD;
5894
5895 if (ptr.p->is_lcp())
5896 {
5897 /* Maintain LCP totals */
5898 ptr.p->noOfRecords+= op.noOfRecords;
5899 ptr.p->noOfBytes+= op.noOfBytes;
5900
5901 ptr.p->slaveState.setState(STOPPING);
5902 filePtr.p->operation.dataBuffer.eof();
5903 }
5904 else
5905 {
5906 BackupFragmentConf * conf = (BackupFragmentConf*)signal->getDataPtrSend();
5907 conf->backupId = ptr.p->backupId;
5908 conf->backupPtr = ptr.i;
5909 conf->tableId = filePtr.p->tableId;
5910 conf->fragmentNo = filePtr.p->fragmentNo;
5911 conf->noOfRecordsLow = (Uint32)(op.noOfRecords & 0xFFFFFFFF);
5912 conf->noOfRecordsHigh = (Uint32)(op.noOfRecords >> 32);
5913 conf->noOfBytesLow = (Uint32)(op.noOfBytes & 0xFFFFFFFF);
5914 conf->noOfBytesHigh = (Uint32)(op.noOfBytes >> 32);
5915 sendSignal(ptr.p->masterRef, GSN_BACKUP_FRAGMENT_CONF, signal,
5916 BackupFragmentConf::SignalLength, JBA);
5917
5918 ptr.p->m_gsn = GSN_BACKUP_FRAGMENT_CONF;
5919 ptr.p->slaveState.setState(STARTED);
5920 }
5921 return;
5922 }
5923
5924 void
backupFragmentRef(Signal * signal,BackupFilePtr filePtr)5925 Backup::backupFragmentRef(Signal * signal, BackupFilePtr filePtr)
5926 {
5927 BackupRecordPtr ptr;
5928 c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
5929
5930 ptr.p->m_gsn = GSN_BACKUP_FRAGMENT_REF;
5931
5932 CRASH_INSERTION((10044));
5933 CRASH_INSERTION((10045));
5934
5935 BackupFragmentRef * ref = (BackupFragmentRef*)signal->getDataPtrSend();
5936 ref->backupId = ptr.p->backupId;
5937 ref->backupPtr = ptr.i;
5938 ref->nodeId = getOwnNodeId();
5939 ref->errorCode = filePtr.p->errorCode;
5940 sendSignal(ptr.p->masterRef, GSN_BACKUP_FRAGMENT_REF, signal,
5941 BackupFragmentRef::SignalLength, JBB);
5942 }
5943
5944 void
checkScan(Signal * signal,BackupRecordPtr ptr,BackupFilePtr filePtr)5945 Backup::checkScan(Signal* signal,
5946 BackupRecordPtr ptr,
5947 BackupFilePtr filePtr)
5948 {
5949 OperationRecord & op = filePtr.p->operation;
5950 BlockReference lqhRef = 0;
5951 {
5952 if (ptr.p->is_lcp()) {
5953 lqhRef = calcInstanceBlockRef(DBLQH);
5954 } else {
5955 TablePtr tabPtr;
5956 ndbrequire(findTable(ptr, tabPtr, filePtr.p->tableId));
5957 FragmentPtr fragPtr;
5958 tabPtr.p->fragments.getPtr(fragPtr, filePtr.p->fragmentNo);
5959 const Uint32 instanceKey = fragPtr.p->lqhInstanceKey;
5960 lqhRef = numberToRef(DBLQH, instanceKey, getOwnNodeId());
5961 }
5962 }
5963
5964 if(filePtr.p->errorCode != 0 || ptr.p->checkError())
5965 {
5966 jam();
5967
5968 /**
5969 * Close scan
5970 */
5971 op.closeScan();
5972 ScanFragNextReq * req = (ScanFragNextReq *)signal->getDataPtrSend();
5973 req->senderData = filePtr.i;
5974 req->requestInfo = 0;
5975 ScanFragNextReq::setCloseFlag(req->requestInfo, 1);
5976 req->transId1 = 0;
5977 req->transId2 = (BACKUP << 20) + (getOwnNodeId() << 8);
5978 sendSignal(lqhRef, GSN_SCAN_NEXTREQ, signal,
5979 ScanFragNextReq::SignalLength, JBB);
5980 return;
5981 }//if
5982
5983 if(op.newScan()) {
5984 jam();
5985
5986 ScanFragNextReq * req = (ScanFragNextReq *)signal->getDataPtrSend();
5987 req->senderData = filePtr.i;
5988 req->requestInfo = 0;
5989 req->transId1 = 0;
5990 req->transId2 = (BACKUP << 20) + (getOwnNodeId() << 8);
5991 req->batch_size_rows= ZRESERVED_SCAN_BATCH_SIZE;
5992 req->batch_size_bytes= 0;
5993
5994 if (ERROR_INSERTED(10039) &&
5995 filePtr.p->tableId >= 2 &&
5996 filePtr.p->operation.noOfRecords > 0)
5997 {
5998 ndbout_c("halting backup for table %d fragment: %d after %llu records",
5999 filePtr.p->tableId,
6000 filePtr.p->fragmentNo,
6001 filePtr.p->operation.noOfRecords);
6002 memmove(signal->theData+2, signal->theData,
6003 4*ScanFragNextReq::SignalLength);
6004 signal->theData[0] = BackupContinueB::ZDELAY_SCAN_NEXT;
6005 signal->theData[1] = filePtr.i;
6006 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal,
6007 300, 2+ScanFragNextReq::SignalLength);
6008 return;
6009 }
6010 if(ERROR_INSERTED(10032))
6011 sendSignalWithDelay(lqhRef, GSN_SCAN_NEXTREQ, signal,
6012 100, ScanFragNextReq::SignalLength);
6013 else if(ERROR_INSERTED(10033))
6014 {
6015 SET_ERROR_INSERT_VALUE(10032);
6016 sendSignalWithDelay(lqhRef, GSN_SCAN_NEXTREQ, signal,
6017 10000, ScanFragNextReq::SignalLength);
6018
6019 BackupRecordPtr ptr;
6020 c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
6021 AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend();
6022 ord->backupId = ptr.p->backupId;
6023 ord->backupPtr = ptr.i;
6024 ord->requestType = AbortBackupOrd::FileOrScanError;
6025 ord->senderData= ptr.i;
6026 sendSignal(ptr.p->masterRef, GSN_ABORT_BACKUP_ORD, signal,
6027 AbortBackupOrd::SignalLength, JBB);
6028 }
6029 #ifdef ERROR_INSERT
6030 else if (ERROR_INSERTED(10042) && filePtr.p->tableId ==c_error_insert_extra)
6031 {
6032 sendSignalWithDelay(lqhRef, GSN_SCAN_NEXTREQ, signal,
6033 10, ScanFragNextReq::SignalLength);
6034 }
6035 #endif
6036 else
6037 {
6038 /**
6039 * We send all interactions with bounded delay, this means that we will
6040 * wait for at most 128 signals before the signal is put into the A-level
6041 * job buffer. After this we will execute at A-level until we arrive
6042 * back with a SCAN_FRAGCONF. After SCAN_FRAGCONF we get back to here
6043 * again, so this means we will execute at least 16 rows before any
6044 * B-level signals are allowed again. So this means that the LCP will
6045 * scan at least 16 rows per 128 signals even at complete overload.
6046 *
6047 * We will even send yet one more row of 16 rows at A-priority level
6048 * per 100 B-level signals if we have difficulties in even meeting the
6049 * minimum desired checkpoint level.
6050 */
6051 JobBufferLevel prio_level = JBB;
6052 if (check_scan_if_raise_prio(signal, ptr))
6053 {
6054 OperationRecord & op = filePtr.p->operation;
6055 Uint32 *tmp = NULL;
6056 Uint32 sz = 0;
6057 bool eof = FALSE;
6058 bool file_buf_contains_min_write_size =
6059 op.dataBuffer.getReadPtr(&tmp, &sz, &eof);
6060
6061 ScanFragNextReq::setPrioAFlag(req->requestInfo, 1);
6062 if (file_buf_contains_min_write_size ||
6063 filePtr.p->m_num_scan_req_on_prioa >= 2 ||
6064 (filePtr.p->m_num_scan_req_on_prioa == 1 &&
6065 filePtr.p->m_sent_words_in_scan_batch > MAX_LCP_WORDS_PER_BATCH))
6066 {
6067 jam();
6068 /**
6069 * There are three reasons why we won't continue executing at
6070 * prio A level.
6071 *
6072 * 1) Last two executions was on prio A, this means that we have now
6073 * executed 2 sets of 16 rows at prio A level. So it is time to
6074 * give up the prio A level and allow back in some B-level jobs.
6075 *
6076 * 2) The last execution at prio A generated more than the max words
6077 * per A-level batch, so we get back to a bounded delay signal.
6078 *
6079 * 3) We already have a buffer ready to be sent to the file
6080 * system. No reason to execute at a very high priority simply
6081 * to fill buffers not waiting to be filled.
6082 */
6083 filePtr.p->m_sent_words_in_scan_batch = 0;
6084 filePtr.p->m_num_scan_req_on_prioa = 0;
6085 }
6086 else
6087 {
6088 jam();
6089 /* Continue at prio A level 16 more rows */
6090 filePtr.p->m_num_scan_req_on_prioa++;
6091 prio_level = JBA;
6092 }
6093 }
6094 else
6095 {
6096 jam();
6097 filePtr.p->m_sent_words_in_scan_batch = 0;
6098 filePtr.p->m_num_scan_req_on_prioa = 0;
6099 }
6100 if (lqhRef == calcInstanceBlockRef(DBLQH) && (prio_level == JBB))
6101 {
6102 sendSignalWithDelay(lqhRef, GSN_SCAN_NEXTREQ, signal,
6103 BOUNDED_DELAY, ScanFragNextReq::SignalLength);
6104 }
6105 else
6106 {
6107 /* Cannot send delayed signals to other threads. */
6108 sendSignal(lqhRef,
6109 GSN_SCAN_NEXTREQ,
6110 signal,
6111 ScanFragNextReq::SignalLength,
6112 prio_level);
6113 }
6114 /*
6115 check if it is time to report backup status
6116 */
6117 BackupRecordPtr ptr;
6118 c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
6119 if (!ptr.p->is_lcp())
6120 {
6121 jam();
6122 checkReportStatus(signal, ptr);
6123 }
6124 }
6125 return;
6126 }//if
6127
6128 filePtr.p->m_sent_words_in_scan_batch = 0;
6129 filePtr.p->m_num_scan_req_on_prioa = 0;
6130
6131 signal->theData[0] = BackupContinueB::BUFFER_FULL_SCAN;
6132 signal->theData[1] = filePtr.i;
6133 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal,
6134 WaitDiskBufferCapacityMillis, 2);
6135 }
6136
6137 void
execFSAPPENDREF(Signal * signal)6138 Backup::execFSAPPENDREF(Signal* signal)
6139 {
6140 jamEntry();
6141
6142 FsRef * ref = (FsRef *)signal->getDataPtr();
6143
6144 const Uint32 filePtrI = ref->userPointer;
6145 const Uint32 errCode = ref->errorCode;
6146
6147 BackupFilePtr filePtr;
6148 c_backupFilePool.getPtr(filePtr, filePtrI);
6149
6150 filePtr.p->m_flags &= ~(Uint32)BackupFile::BF_FILE_THREAD;
6151 filePtr.p->errorCode = errCode;
6152
6153 checkFile(signal, filePtr);
6154 }
6155
6156 void
execFSAPPENDCONF(Signal * signal)6157 Backup::execFSAPPENDCONF(Signal* signal)
6158 {
6159 jamEntry();
6160
6161 CRASH_INSERTION((10018));
6162
6163 //FsConf * conf = (FsConf*)signal->getDataPtr();
6164 const Uint32 filePtrI = signal->theData[0]; //conf->userPointer;
6165 const Uint32 bytes = signal->theData[1]; //conf->bytes;
6166
6167 BackupFilePtr filePtr;
6168 c_backupFilePool.getPtr(filePtr, filePtrI);
6169
6170 OperationRecord & op = filePtr.p->operation;
6171
6172 op.dataBuffer.updateReadPtr(bytes >> 2);
6173
6174 checkFile(signal, filePtr);
6175 }
6176
6177 /*
6178 This routine handles two problems with writing to disk during local
6179 checkpoints and backups. The first problem is that we need to limit
6180 the writing to ensure that we don't use too much CPU and disk resources
6181 for backups and checkpoints. The perfect solution to this is to use
6182 a dynamic algorithm that adapts to the environment. Until we have
6183 implemented this we can satisfy ourselves with an algorithm that
6184 uses a configurable limit.
6185
6186 The second problem is that in Linux we can get severe problems if we
6187 write very much to the disk without synching. In the worst case we
6188 can have Gigabytes of data in the Linux page cache before we reach
6189 the limit of how much we can write. If this happens the performance
6190 will drop significantly when we reach this limit since the Linux flush
6191 daemon will spend a few minutes on writing out the page cache to disk.
6192 To avoid this we ensure that a file never have more than a certain
6193 amount of data outstanding before synch. This variable is also
6194 configurable.
6195 */
6196 bool
ready_to_write(bool ready,Uint32 sz,bool eof,BackupFile * fileP)6197 Backup::ready_to_write(bool ready, Uint32 sz, bool eof, BackupFile *fileP)
6198 {
6199 #if 0
6200 ndbout << "ready_to_write: ready = " << ready << " eof = " << eof;
6201 ndbout << " sz = " << sz << endl;
6202 ndbout << "words this period = " << m_words_written_this_period;
6203 ndbout << endl << "overflow disk write = " << m_overflow_disk_write;
6204 ndbout << endl << "Current Millisecond is = ";
6205 ndbout << NdbTick_CurrentMillisecond() << endl;
6206 #endif
6207
6208 if (ERROR_INSERTED(10043) && eof)
6209 {
6210 /* Block indefinitely without closing the file */
6211 return false;
6212 }
6213
6214 if ((ready || eof) &&
6215 m_words_written_this_period <= m_curr_disk_write_speed)
6216 {
6217 /*
6218 We have a buffer ready to write or we have reached end of
6219 file and thus we must write the last before closing the
6220 file.
6221 We have already checked that we are allowed to write at this
6222 moment. We only worry about history of last 100 milliseconds.
6223 What happened before that is of no interest since a disk
6224 write that was issued more than 100 milliseconds should be
6225 completed by now.
6226 */
6227 int overflow;
6228 m_monitor_words_written+= sz;
6229 m_words_written_this_period += sz;
6230 overflow = m_words_written_this_period - m_curr_disk_write_speed;
6231 if (overflow > 0)
6232 m_overflow_disk_write = overflow;
6233 #if 0
6234 ndbout << "Will write with " << endl;
6235 ndbout << endl;
6236 #endif
6237 return true;
6238 }
6239 else
6240 {
6241 #if 0
6242 ndbout << "Will not write now" << endl << endl;
6243 #endif
6244 return false;
6245 }
6246 }
6247
6248 void
checkFile(Signal * signal,BackupFilePtr filePtr)6249 Backup::checkFile(Signal* signal, BackupFilePtr filePtr)
6250 {
6251
6252 #ifdef DEBUG_ABORT
6253 // ndbout_c("---- check file filePtr.i = %u", filePtr.i);
6254 #endif
6255
6256 OperationRecord & op = filePtr.p->operation;
6257 Uint32 *tmp = NULL;
6258 Uint32 sz = 0;
6259 bool eof = FALSE;
6260 bool ready = op.dataBuffer.getReadPtr(&tmp, &sz, &eof);
6261 #if 0
6262 ndbout << "Ptr to data = " << hex << tmp << endl;
6263 #endif
6264 BackupRecordPtr ptr;
6265 c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
6266
6267 if (ERROR_INSERTED(10036))
6268 {
6269 jam();
6270 filePtr.p->m_flags &= ~(Uint32)BackupFile::BF_FILE_THREAD;
6271 filePtr.p->errorCode = 2810;
6272 ptr.p->setErrorCode(2810);
6273
6274 if(ptr.p->m_gsn == GSN_STOP_BACKUP_REQ)
6275 {
6276 jam();
6277 closeFile(signal, ptr, filePtr);
6278 }
6279 return;
6280 }
6281
6282 if(filePtr.p->errorCode != 0)
6283 {
6284 jam();
6285 ptr.p->setErrorCode(filePtr.p->errorCode);
6286
6287 if(ptr.p->m_gsn == GSN_STOP_BACKUP_REQ)
6288 {
6289 jam();
6290 closeFile(signal, ptr, filePtr);
6291 }
6292
6293 if (ptr.p->is_lcp())
6294 {
6295 jam();
6296 /* Close file with error - will delete it */
6297 closeFile(signal, ptr, filePtr);
6298 }
6299
6300 return;
6301 }
6302
6303 if (!ready_to_write(ready, sz, eof, filePtr.p))
6304 {
6305 jam();
6306 signal->theData[0] = BackupContinueB::BUFFER_UNDERFLOW;
6307 signal->theData[1] = filePtr.i;
6308 sendSignalWithDelay(reference(), GSN_CONTINUEB, signal,
6309 WaitDiskBufferCapacityMillis, 2);
6310 return;
6311 }
6312 else if (sz > 0)
6313 {
6314 jam();
6315 #ifdef ERROR_INSERT
6316 /* Test APPENDREF handling */
6317 if (filePtr.p->fileType == BackupFormat::DATA_FILE)
6318 {
6319 if (ERROR_INSERTED(10045))
6320 {
6321 ndbout_c("BF_SCAN_THREAD = %u",
6322 (filePtr.p->m_flags & BackupFile::BF_SCAN_THREAD));
6323 }
6324
6325 if ((ERROR_INSERTED(10044) &&
6326 !(filePtr.p->m_flags & BackupFile::BF_SCAN_THREAD)) ||
6327 (ERROR_INSERTED(10045) &&
6328 (filePtr.p->m_flags & BackupFile::BF_SCAN_THREAD)))
6329 {
6330 jam();
6331 ndbout_c("REFing on append to data file for table %u, fragment %u, "
6332 "BF_SCAN_THREAD running : %u",
6333 filePtr.p->tableId,
6334 filePtr.p->fragmentNo,
6335 filePtr.p->m_flags & BackupFile::BF_SCAN_THREAD);
6336 FsRef* ref = (FsRef *)signal->getDataPtrSend();
6337 ref->userPointer = filePtr.i;
6338 ref->errorCode = FsRef::fsErrInvalidParameters;
6339 ref->osErrorCode = ~0;
6340 /* EXEC DIRECT to avoid change in BF_SCAN_THREAD state */
6341 EXECUTE_DIRECT(BACKUP, GSN_FSAPPENDREF, signal,
6342 3);
6343 return;
6344 }
6345 }
6346 #endif
6347
6348 ndbassert((Uint64(tmp - c_startOfPages) >> 32) == 0); // 4Gb buffers!
6349 FsAppendReq * req = (FsAppendReq *)signal->getDataPtrSend();
6350 req->filePointer = filePtr.p->filePointer;
6351 req->userPointer = filePtr.i;
6352 req->userReference = reference();
6353 req->varIndex = 0;
6354 req->offset = Uint32(tmp - c_startOfPages); // 4Gb buffers!
6355 req->size = sz;
6356 req->synch_flag = 0;
6357
6358 sendSignal(NDBFS_REF, GSN_FSAPPENDREQ, signal,
6359 FsAppendReq::SignalLength, JBA);
6360 return;
6361 }
6362
6363 Uint32 flags = filePtr.p->m_flags;
6364 filePtr.p->m_flags &= ~(Uint32)BackupFile::BF_FILE_THREAD;
6365
6366 ndbrequire(flags & BackupFile::BF_OPEN);
6367 ndbrequire(flags & BackupFile::BF_FILE_THREAD);
6368
6369 closeFile(signal, ptr, filePtr);
6370 }
6371
6372
6373 /****************************************************************************
6374 *
6375 * Slave functionallity: Perform logging
6376 *
6377 ****************************************************************************/
6378 void
execBACKUP_TRIG_REQ(Signal * signal)6379 Backup::execBACKUP_TRIG_REQ(Signal* signal)
6380 {
6381 /*
6382 TUP asks if this trigger is to be fired on this node.
6383 */
6384 TriggerPtr trigPtr;
6385 TablePtr tabPtr;
6386 FragmentPtr fragPtr;
6387 Uint32 trigger_id = signal->theData[0];
6388 Uint32 frag_id = signal->theData[1];
6389 Uint32 result;
6390
6391 jamEntry();
6392
6393 c_triggerPool.getPtr(trigPtr, trigger_id);
6394
6395 c_tablePool.getPtr(tabPtr, trigPtr.p->tab_ptr_i);
6396 tabPtr.p->fragments.getPtr(fragPtr, frag_id);
6397 if (fragPtr.p->node != getOwnNodeId()) {
6398
6399 jam();
6400 result = ZFALSE;
6401 } else {
6402 jam();
6403 result = ZTRUE;
6404 }//if
6405 signal->theData[0] = result;
6406 }
6407
6408 BackupFormat::LogFile::LogEntry *
get_log_buffer(Signal * signal,TriggerPtr trigPtr,Uint32 sz)6409 Backup::get_log_buffer(Signal* signal,
6410 TriggerPtr trigPtr, Uint32 sz)
6411 {
6412 Uint32 * dst;
6413 if(ERROR_INSERTED(10030))
6414 {
6415 jam();
6416 dst = 0;
6417 }
6418 else
6419 {
6420 jam();
6421 FsBuffer & buf = trigPtr.p->operation->dataBuffer;
6422 ndbrequire(sz <= buf.getMaxWrite());
6423 if (unlikely(!buf.getWritePtr(&dst, sz)))
6424 {
6425 jam();
6426 dst = 0;
6427 }
6428 }
6429
6430 if (unlikely(dst == 0))
6431 {
6432 Uint32 save[TrigAttrInfo::StaticLength];
6433 memcpy(save, signal->getDataPtr(), 4*TrigAttrInfo::StaticLength);
6434 BackupRecordPtr ptr;
6435 c_backupPool.getPtr(ptr, trigPtr.p->backupPtr);
6436 trigPtr.p->errorCode = AbortBackupOrd::LogBufferFull;
6437 AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend();
6438 ord->backupId = ptr.p->backupId;
6439 ord->backupPtr = ptr.i;
6440 ord->requestType = AbortBackupOrd::LogBufferFull;
6441 ord->senderData= ptr.i;
6442 sendSignal(ptr.p->masterRef, GSN_ABORT_BACKUP_ORD, signal,
6443 AbortBackupOrd::SignalLength, JBB);
6444
6445 memcpy(signal->getDataPtrSend(), save, 4*TrigAttrInfo::StaticLength);
6446 return 0;
6447 }//if
6448
6449 BackupFormat::LogFile::LogEntry * logEntry =
6450 (BackupFormat::LogFile::LogEntry *)dst;
6451 logEntry->Length = 0;
6452 logEntry->TableId = htonl(trigPtr.p->tableId);
6453
6454 if(trigPtr.p->event==0)
6455 logEntry->TriggerEvent= htonl(TriggerEvent::TE_INSERT);
6456 else if(trigPtr.p->event==1)
6457 logEntry->TriggerEvent= htonl(TriggerEvent::TE_UPDATE);
6458 else if(trigPtr.p->event==2)
6459 logEntry->TriggerEvent= htonl(TriggerEvent::TE_DELETE);
6460 else {
6461 ndbout << "Bad Event: " << trigPtr.p->event << endl;
6462 ndbrequire(false);
6463 }
6464
6465 return logEntry;
6466 }
6467
6468 void
execTRIG_ATTRINFO(Signal * signal)6469 Backup::execTRIG_ATTRINFO(Signal* signal) {
6470 jamEntry();
6471
6472 CRASH_INSERTION((10019));
6473
6474 TrigAttrInfo * trg = (TrigAttrInfo*)signal->getDataPtr();
6475
6476 TriggerPtr trigPtr;
6477 c_triggerPool.getPtr(trigPtr, trg->getTriggerId());
6478 ndbrequire(trigPtr.p->event != ILLEGAL_TRIGGER_ID); // Online...
6479
6480 if(trigPtr.p->errorCode != 0) {
6481 jam();
6482 return;
6483 }//if
6484
6485 BackupRecordPtr ptr;
6486 c_backupPool.getPtr(ptr, trigPtr.p->backupPtr);
6487
6488 if(ptr.p->flags & BackupReq::USE_UNDO_LOG) {
6489 if(trg->getAttrInfoType() == TrigAttrInfo::AFTER_VALUES) {
6490 jam();
6491 /**
6492 * Backup is doing UNDO logging and don't need after values
6493 */
6494 return;
6495 }//if
6496 }
6497 else {
6498 if(trg->getAttrInfoType() == TrigAttrInfo::BEFORE_VALUES) {
6499 jam();
6500 /**
6501 * Backup is doing REDO logging and don't need before values
6502 */
6503 return;
6504 }//if
6505 }
6506
6507 BackupFormat::LogFile::LogEntry * logEntry = trigPtr.p->logEntry;
6508 if(logEntry == 0)
6509 {
6510 jam();
6511 Uint32 sz = trigPtr.p->maxRecordSize;
6512 logEntry = trigPtr.p->logEntry = get_log_buffer(signal, trigPtr, sz);
6513 if (unlikely(logEntry == 0))
6514 {
6515 jam();
6516 return;
6517 }
6518 } else {
6519 ndbrequire(logEntry->TableId == htonl(trigPtr.p->tableId));
6520 // ndbrequire(logEntry->TriggerEvent == htonl(trigPtr.p->event));
6521 }//if
6522
6523 const Uint32 pos = logEntry->Length;
6524 const Uint32 dataLen = signal->length() - TrigAttrInfo::StaticLength;
6525 memcpy(&logEntry->Data[pos], trg->getData(), dataLen << 2);
6526
6527 logEntry->Length = pos + dataLen;
6528 }
6529
6530 void
execFIRE_TRIG_ORD(Signal * signal)6531 Backup::execFIRE_TRIG_ORD(Signal* signal)
6532 {
6533 jamEntry();
6534 FireTrigOrd* trg = (FireTrigOrd*)signal->getDataPtr();
6535
6536 const Uint32 gci = trg->getGCI();
6537 const Uint32 trI = trg->getTriggerId();
6538 const Uint32 fragId = trg->fragId;
6539
6540 TriggerPtr trigPtr;
6541 c_triggerPool.getPtr(trigPtr, trI);
6542
6543 ndbrequire(trigPtr.p->event != ILLEGAL_TRIGGER_ID);
6544
6545 BackupRecordPtr ptr;
6546 c_backupPool.getPtr(ptr, trigPtr.p->backupPtr);
6547
6548 if(trigPtr.p->errorCode != 0) {
6549 jam();
6550 SectionHandle handle(this, signal);
6551 releaseSections(handle);
6552 return;
6553 }//if
6554
6555 if (signal->getNoOfSections())
6556 {
6557 jam();
6558 SectionHandle handle(this, signal);
6559 TablePtr tabPtr;
6560 c_tablePool.getPtr(tabPtr, trigPtr.p->tab_ptr_i);
6561 FragmentPtr fragPtr;
6562 tabPtr.p->fragments.getPtr(fragPtr, fragId);
6563 if (fragPtr.p->node != getOwnNodeId())
6564 {
6565 jam();
6566 trigPtr.p->logEntry = 0;
6567 releaseSections(handle);
6568 return;
6569 }
6570
6571 SegmentedSectionPtr dataPtr[3];
6572 handle.getSection(dataPtr[0], 0);
6573 handle.getSection(dataPtr[1], 1);
6574 handle.getSection(dataPtr[2], 2);
6575 /**
6576 * dataPtr[0] : Primary key info
6577 * dataPtr[1] : Before values
6578 * dataPtr[2] : After values
6579 */
6580
6581 /* Backup is doing UNDO logging and need before values
6582 * Add 2 extra words to get_log_buffer for potential gci and logEntry length info stored at end.
6583 */
6584 if(ptr.p->flags & BackupReq::USE_UNDO_LOG) {
6585 trigPtr.p->logEntry = get_log_buffer(signal,
6586 trigPtr, dataPtr[0].sz + dataPtr[1].sz + 2);
6587 if (unlikely(trigPtr.p->logEntry == 0))
6588 {
6589 jam();
6590 releaseSections(handle);
6591 return;
6592 }
6593 copy(trigPtr.p->logEntry->Data, dataPtr[0]);
6594 copy(trigPtr.p->logEntry->Data+dataPtr[0].sz, dataPtr[1]);
6595 trigPtr.p->logEntry->Length = dataPtr[0].sz + dataPtr[1].sz;
6596 }
6597 // Backup is doing REDO logging and need after values
6598 else {
6599 trigPtr.p->logEntry = get_log_buffer(signal,
6600 trigPtr, dataPtr[0].sz + dataPtr[2].sz + 1);
6601 if (unlikely(trigPtr.p->logEntry == 0))
6602 {
6603 jam();
6604 releaseSections(handle);
6605 return;
6606 }
6607 copy(trigPtr.p->logEntry->Data, dataPtr[0]);
6608 copy(trigPtr.p->logEntry->Data+dataPtr[0].sz, dataPtr[2]);
6609 trigPtr.p->logEntry->Length = dataPtr[0].sz + dataPtr[2].sz;
6610 }
6611
6612 releaseSections(handle);
6613 }
6614
6615 ndbrequire(trigPtr.p->logEntry != 0);
6616 Uint32 len = trigPtr.p->logEntry->Length;
6617 trigPtr.p->logEntry->FragId = htonl(fragId);
6618
6619 if(gci != ptr.p->currGCP)
6620 {
6621 jam();
6622 trigPtr.p->logEntry->TriggerEvent|= htonl(0x10000);
6623 trigPtr.p->logEntry->Data[len] = htonl(gci);
6624 len++;
6625 ptr.p->currGCP = gci;
6626 }
6627
6628 Uint32 datalen = len;
6629 len += (sizeof(BackupFormat::LogFile::LogEntry) >> 2) - 2;
6630 trigPtr.p->logEntry->Length = htonl(len);
6631
6632 if(ptr.p->flags & BackupReq::USE_UNDO_LOG)
6633 {
6634 /* keep the length at both the end of logEntry and ->logEntry variable
6635 The total length of logEntry is len + 2
6636 */
6637 trigPtr.p->logEntry->Data[datalen] = htonl(len);
6638 }
6639
6640 Uint32 entryLength = len +1;
6641 if(ptr.p->flags & BackupReq::USE_UNDO_LOG)
6642 entryLength ++;
6643
6644 ndbrequire(entryLength <= trigPtr.p->operation->dataBuffer.getMaxWrite());
6645 trigPtr.p->operation->dataBuffer.updateWritePtr(entryLength);
6646 trigPtr.p->logEntry = 0;
6647
6648 {
6649 const Uint32 entryByteLength = entryLength << 2;
6650 trigPtr.p->operation->noOfBytes += entryByteLength;
6651 trigPtr.p->operation->m_bytes_total += entryByteLength;
6652 trigPtr.p->operation->noOfRecords += 1;
6653 trigPtr.p->operation->m_records_total += 1;
6654 }
6655 }
6656
6657 void
sendAbortBackupOrd(Signal * signal,BackupRecordPtr ptr,Uint32 requestType)6658 Backup::sendAbortBackupOrd(Signal* signal, BackupRecordPtr ptr,
6659 Uint32 requestType)
6660 {
6661 jam();
6662 AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend();
6663 ord->backupId = ptr.p->backupId;
6664 ord->backupPtr = ptr.i;
6665 ord->requestType = requestType;
6666 ord->senderData= ptr.i;
6667 NodePtr node;
6668 for(c_nodes.first(node); node.i != RNIL; c_nodes.next(node)) {
6669 jam();
6670 const Uint32 nodeId = node.p->nodeId;
6671 if(node.p->alive && ptr.p->nodes.get(nodeId)) {
6672 jam();
6673 BlockReference ref = numberToRef(BACKUP, instanceKey(ptr), nodeId);
6674 sendSignal(ref, GSN_ABORT_BACKUP_ORD, signal,
6675 AbortBackupOrd::SignalLength, JBB);
6676 }//if
6677 }//for
6678 }
6679
6680 /*****************************************************************************
6681 *
6682 * Slave functionallity: Stop backup
6683 *
6684 *****************************************************************************/
6685 void
execSTOP_BACKUP_REQ(Signal * signal)6686 Backup::execSTOP_BACKUP_REQ(Signal* signal)
6687 {
6688 jamEntry();
6689 StopBackupReq * req = (StopBackupReq*)signal->getDataPtr();
6690
6691 CRASH_INSERTION((10020));
6692
6693 const Uint32 ptrI = req->backupPtr;
6694 //const Uint32 backupId = req->backupId;
6695 const Uint32 startGCP = req->startGCP;
6696 const Uint32 stopGCP = req->stopGCP;
6697
6698 /**
6699 * At least one GCP must have passed
6700 */
6701 ndbrequire(stopGCP > startGCP);
6702
6703 /**
6704 * Get backup record
6705 */
6706 BackupRecordPtr ptr;
6707 c_backupPool.getPtr(ptr, ptrI);
6708
6709 ptr.p->slaveState.setState(STOPPING);
6710 ptr.p->m_gsn = GSN_STOP_BACKUP_REQ;
6711 ptr.p->startGCP= startGCP;
6712 ptr.p->stopGCP= stopGCP;
6713
6714 /**
6715 * Destroy the triggers in local DBTUP we created
6716 */
6717 sendDropTrig(signal, ptr);
6718 }
6719
6720 void
closeFiles(Signal * sig,BackupRecordPtr ptr)6721 Backup::closeFiles(Signal* sig, BackupRecordPtr ptr)
6722 {
6723 /**
6724 * Close all files
6725 */
6726 BackupFilePtr filePtr;
6727 int openCount = 0;
6728 for(ptr.p->files.first(filePtr); filePtr.i!=RNIL; ptr.p->files.next(filePtr))
6729 {
6730 if(! (filePtr.p->m_flags & BackupFile::BF_OPEN))
6731 {
6732 jam();
6733 continue;
6734 }
6735
6736 jam();
6737 openCount++;
6738
6739 if(filePtr.p->m_flags & BackupFile::BF_CLOSING)
6740 {
6741 jam();
6742 continue;
6743 }//if
6744
6745 filePtr.p->operation.dataBuffer.eof();
6746 if(filePtr.p->m_flags & BackupFile::BF_FILE_THREAD)
6747 {
6748 jam();
6749 #ifdef DEBUG_ABORT
6750 ndbout_c("Close files fileRunning == 1, filePtr.i=%u", filePtr.i);
6751 #endif
6752 }
6753 else
6754 {
6755 jam();
6756 closeFile(sig, ptr, filePtr);
6757 }
6758 }
6759
6760 if(openCount == 0){
6761 jam();
6762 closeFilesDone(sig, ptr);
6763 }//if
6764 }
6765
6766 void
closeFile(Signal * signal,BackupRecordPtr ptr,BackupFilePtr filePtr)6767 Backup::closeFile(Signal* signal, BackupRecordPtr ptr, BackupFilePtr filePtr)
6768 {
6769 ndbrequire(filePtr.p->m_flags & BackupFile::BF_OPEN);
6770 ndbrequire(! (filePtr.p->m_flags & BackupFile::BF_OPENING));
6771 ndbrequire(! (filePtr.p->m_flags & BackupFile::BF_CLOSING));
6772 filePtr.p->m_flags |= BackupFile::BF_CLOSING;
6773
6774 FsCloseReq * req = (FsCloseReq *)signal->getDataPtrSend();
6775 req->filePointer = filePtr.p->filePointer;
6776 req->userPointer = filePtr.i;
6777 req->userReference = reference();
6778 req->fileFlag = 0;
6779
6780 if (ptr.p->errorCode)
6781 {
6782 FsCloseReq::setRemoveFileFlag(req->fileFlag, 1);
6783 }
6784
6785 #ifdef DEBUG_ABORT
6786 ndbout_c("***** a FSCLOSEREQ filePtr.i = %u flags: %x",
6787 filePtr.i, filePtr.p->m_flags);
6788 #endif
6789 sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, signal, FsCloseReq::SignalLength, JBA);
6790
6791 }
6792
6793 void
execFSCLOSEREF(Signal * signal)6794 Backup::execFSCLOSEREF(Signal* signal)
6795 {
6796 jamEntry();
6797
6798 FsRef * ref = (FsRef*)signal->getDataPtr();
6799 const Uint32 filePtrI = ref->userPointer;
6800
6801 BackupFilePtr filePtr;
6802 c_backupFilePool.getPtr(filePtr, filePtrI);
6803
6804 BackupRecordPtr ptr;
6805 c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
6806
6807 FsConf * conf = (FsConf*)signal->getDataPtr();
6808 conf->userPointer = filePtrI;
6809
6810 execFSCLOSECONF(signal);
6811 }
6812
6813 void
execFSCLOSECONF(Signal * signal)6814 Backup::execFSCLOSECONF(Signal* signal)
6815 {
6816 jamEntry();
6817
6818 FsConf * conf = (FsConf*)signal->getDataPtr();
6819 const Uint32 filePtrI = conf->userPointer;
6820
6821 BackupFilePtr filePtr;
6822 c_backupFilePool.getPtr(filePtr, filePtrI);
6823
6824 #ifdef DEBUG_ABORT
6825 ndbout_c("***** FSCLOSECONF filePtrI = %u", filePtrI);
6826 #endif
6827
6828 ndbrequire(filePtr.p->m_flags == (BackupFile::BF_OPEN |
6829 BackupFile::BF_CLOSING));
6830
6831
6832 filePtr.p->m_flags &= ~(Uint32)(BackupFile::BF_OPEN |BackupFile::BF_CLOSING);
6833 filePtr.p->operation.dataBuffer.reset();
6834
6835 BackupRecordPtr ptr;
6836 c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
6837 closeFiles(signal, ptr);
6838 }
6839
6840 void
closeFilesDone(Signal * signal,BackupRecordPtr ptr)6841 Backup::closeFilesDone(Signal* signal, BackupRecordPtr ptr)
6842 {
6843 jam();
6844
6845 if(ptr.p->is_lcp())
6846 {
6847 lcp_close_file_conf(signal, ptr);
6848 return;
6849 }
6850
6851 jam();
6852
6853 //error when do insert footer or close file
6854 if(ptr.p->checkError())
6855 {
6856 StopBackupRef * ref = (StopBackupRef*)signal->getDataPtr();
6857 ref->backupPtr = ptr.i;
6858 ref->backupId = ptr.p->backupId;
6859 ref->errorCode = ptr.p->errorCode;
6860 ref->nodeId = getOwnNodeId();
6861 sendSignal(ptr.p->masterRef, GSN_STOP_BACKUP_REF, signal,
6862 StopBackupConf::SignalLength, JBB);
6863
6864 ptr.p->m_gsn = GSN_STOP_BACKUP_REF;
6865 ptr.p->slaveState.setState(CLEANING);
6866 return;
6867 }
6868
6869 StopBackupConf* conf = (StopBackupConf*)signal->getDataPtrSend();
6870 conf->backupId = ptr.p->backupId;
6871 conf->backupPtr = ptr.i;
6872
6873 BackupFilePtr filePtr;
6874 if(ptr.p->logFilePtr != RNIL)
6875 {
6876 ptr.p->files.getPtr(filePtr, ptr.p->logFilePtr);
6877 conf->noOfLogBytes= Uint32(filePtr.p->operation.noOfBytes); // TODO
6878 conf->noOfLogRecords= Uint32(filePtr.p->operation.noOfRecords); // TODO
6879 }
6880 else
6881 {
6882 conf->noOfLogBytes= 0;
6883 conf->noOfLogRecords= 0;
6884 }
6885
6886 sendSignal(ptr.p->masterRef, GSN_STOP_BACKUP_CONF, signal,
6887 StopBackupConf::SignalLength, JBB);
6888
6889 ptr.p->m_gsn = GSN_STOP_BACKUP_CONF;
6890 ptr.p->slaveState.setState(CLEANING);
6891 }
6892
6893 /*****************************************************************************
6894 *
6895 * Slave functionallity: Abort backup
6896 *
6897 *****************************************************************************/
6898 /*****************************************************************************
6899 *
6900 * Slave functionallity: Abort backup
6901 *
6902 *****************************************************************************/
6903 void
execABORT_BACKUP_ORD(Signal * signal)6904 Backup::execABORT_BACKUP_ORD(Signal* signal)
6905 {
6906 jamEntry();
6907 AbortBackupOrd* ord = (AbortBackupOrd*)signal->getDataPtr();
6908
6909 const Uint32 backupId = ord->backupId;
6910 const AbortBackupOrd::RequestType requestType =
6911 (AbortBackupOrd::RequestType)ord->requestType;
6912 const Uint32 senderData = ord->senderData;
6913
6914 #ifdef DEBUG_ABORT
6915 ndbout_c("******** ABORT_BACKUP_ORD ********* nodeId = %u",
6916 refToNode(signal->getSendersBlockRef()));
6917 ndbout_c("backupId = %u, requestType = %u, senderData = %u, ",
6918 backupId, requestType, senderData);
6919 dumpUsedResources();
6920 #endif
6921
6922 BackupRecordPtr ptr;
6923 if(requestType == AbortBackupOrd::ClientAbort) {
6924 if (getOwnNodeId() != getMasterNodeId()) {
6925 jam();
6926 // forward to master
6927 #ifdef DEBUG_ABORT
6928 ndbout_c("---- Forward to master nodeId = %u", getMasterNodeId());
6929 #endif
6930 BlockReference ref = numberToRef(BACKUP, UserBackupInstanceKey,
6931 getMasterNodeId());
6932 sendSignal(ref, GSN_ABORT_BACKUP_ORD,
6933 signal, AbortBackupOrd::SignalLength, JBB);
6934 return;
6935 }
6936 jam();
6937 for(c_backups.first(ptr); ptr.i != RNIL; c_backups.next(ptr)) {
6938 jam();
6939 if(ptr.p->backupId == backupId && ptr.p->clientData == senderData) {
6940 jam();
6941 break;
6942 }//if
6943 }//for
6944 if(ptr.i == RNIL) {
6945 jam();
6946 return;
6947 }//if
6948 } else {
6949 if (c_backupPool.findId(senderData)) {
6950 jam();
6951 c_backupPool.getPtr(ptr, senderData);
6952 } else {
6953 jam();
6954 #ifdef DEBUG_ABORT
6955 ndbout_c("Backup: abort request type=%u on id=%u,%u not found",
6956 requestType, backupId, senderData);
6957 #endif
6958 return;
6959 }
6960 }//if
6961
6962 ptr.p->m_gsn = GSN_ABORT_BACKUP_ORD;
6963 const bool isCoordinator = (ptr.p->masterRef == reference());
6964
6965 bool ok = false;
6966 switch(requestType){
6967
6968 /**
6969 * Requests sent to master
6970 */
6971 case AbortBackupOrd::ClientAbort:
6972 jam();
6973 // fall through
6974 case AbortBackupOrd::LogBufferFull:
6975 jam();
6976 // fall through
6977 case AbortBackupOrd::FileOrScanError:
6978 jam();
6979 ndbrequire(isCoordinator);
6980 ptr.p->setErrorCode(requestType);
6981 if(ptr.p->masterData.gsn == GSN_BACKUP_FRAGMENT_REQ)
6982 {
6983 /**
6984 * Only scans are actively aborted
6985 */
6986 abort_scan(signal, ptr);
6987 }
6988 return;
6989
6990 /**
6991 * Requests sent to slave
6992 */
6993 case AbortBackupOrd::AbortScan:
6994 jam();
6995 ptr.p->setErrorCode(requestType);
6996 return;
6997
6998 case AbortBackupOrd::BackupComplete:
6999 jam();
7000 cleanup(signal, ptr);
7001 return;
7002 case AbortBackupOrd::BackupFailure:
7003 case AbortBackupOrd::BackupFailureDueToNodeFail:
7004 case AbortBackupOrd::OkToClean:
7005 case AbortBackupOrd::IncompatibleVersions:
7006 #ifndef VM_TRACE
7007 default:
7008 #endif
7009 ptr.p->setErrorCode(requestType);
7010 ptr.p->masterData.errorCode = requestType;
7011 ok= true;
7012 }
7013 ndbrequire(ok);
7014
7015 ptr.p->masterRef = reference();
7016 ptr.p->nodes.clear();
7017 ptr.p->nodes.set(getOwnNodeId());
7018
7019
7020 ptr.p->stopGCP= ptr.p->startGCP + 1;
7021 sendStopBackup(signal, ptr);
7022 }
7023
7024
7025 void
dumpUsedResources()7026 Backup::dumpUsedResources()
7027 {
7028 jam();
7029 BackupRecordPtr ptr;
7030
7031 for(c_backups.first(ptr); ptr.i != RNIL; c_backups.next(ptr)) {
7032 ndbout_c("Backup id=%u, slaveState.getState = %u, errorCode=%u",
7033 ptr.p->backupId,
7034 ptr.p->slaveState.getState(),
7035 ptr.p->errorCode);
7036
7037 TablePtr tabPtr;
7038 for(ptr.p->tables.first(tabPtr);
7039 tabPtr.i != RNIL;
7040 ptr.p->tables.next(tabPtr)) {
7041 jam();
7042 for(Uint32 j = 0; j<3; j++) {
7043 jam();
7044 TriggerPtr trigPtr;
7045 if(tabPtr.p->triggerAllocated[j]) {
7046 jam();
7047 c_triggerPool.getPtr(trigPtr, tabPtr.p->triggerIds[j]);
7048 ndbout_c("Allocated[%u] Triggerid = %u, event = %u",
7049 j,
7050 tabPtr.p->triggerIds[j],
7051 trigPtr.p->event);
7052 }//if
7053 }//for
7054 }//for
7055
7056 BackupFilePtr filePtr;
7057 for(ptr.p->files.first(filePtr);
7058 filePtr.i != RNIL;
7059 ptr.p->files.next(filePtr)) {
7060 jam();
7061 ndbout_c("filePtr.i = %u, flags: H'%x ",
7062 filePtr.i, filePtr.p->m_flags);
7063 }//for
7064 }
7065 }
7066
7067 void
cleanup(Signal * signal,BackupRecordPtr ptr)7068 Backup::cleanup(Signal* signal, BackupRecordPtr ptr)
7069 {
7070 TablePtr tabPtr;
7071 ptr.p->tables.first(tabPtr);
7072 cleanupNextTable(signal, ptr, tabPtr);
7073 }
7074
7075 void
cleanupNextTable(Signal * signal,BackupRecordPtr ptr,TablePtr tabPtr)7076 Backup::cleanupNextTable(Signal *signal, BackupRecordPtr ptr, TablePtr tabPtr)
7077 {
7078 if (tabPtr.i != RNIL)
7079 {
7080 jam();
7081 tabPtr.p->fragments.release();
7082 for(Uint32 j = 0; j<3; j++) {
7083 jam();
7084 TriggerPtr trigPtr;
7085 if(tabPtr.p->triggerAllocated[j]) {
7086 jam();
7087 c_triggerPool.getPtr(trigPtr, tabPtr.p->triggerIds[j]);
7088 trigPtr.p->event = ILLEGAL_TRIGGER_ID;
7089 tabPtr.p->triggerAllocated[j] = false;
7090 }//if
7091 tabPtr.p->triggerIds[j] = ILLEGAL_TRIGGER_ID;
7092 }//for
7093 {
7094 BackupLockTab *req = (BackupLockTab *)signal->getDataPtrSend();
7095 req->m_senderRef = reference();
7096 req->m_tableId = tabPtr.p->tableId;
7097 req->m_lock_unlock = BackupLockTab::UNLOCK_TABLE;
7098 req->m_backup_state = BackupLockTab::CLEANUP;
7099 req->m_backupRecordPtr_I = ptr.i;
7100 req->m_tablePtr_I = tabPtr.i;
7101 sendSignal(DBDICT_REF, GSN_BACKUP_LOCK_TAB_REQ, signal,
7102 BackupLockTab::SignalLength, JBB);
7103 return;
7104 }
7105 }
7106
7107 BackupFilePtr filePtr;
7108 for(ptr.p->files.first(filePtr);filePtr.i != RNIL;ptr.p->files.next(filePtr))
7109 {
7110 jam();
7111 ndbrequire(filePtr.p->m_flags == 0);
7112 filePtr.p->pages.release();
7113 }//for
7114
7115 while (ptr.p->files.releaseFirst());
7116 while (ptr.p->tables.releaseFirst());
7117 while (ptr.p->triggers.releaseFirst());
7118 ptr.p->backupId = ~0;
7119
7120 /*
7121 report of backup status uses these variables to keep track
7122 if files are used
7123 */
7124 ptr.p->ctlFilePtr = ptr.p->logFilePtr = ptr.p->dataFilePtr = RNIL;
7125
7126 if(ptr.p->checkError())
7127 removeBackup(signal, ptr);
7128 else
7129 {
7130 /*
7131 report of backup status uses these variables to keep track
7132 if backup ia running and current state
7133 */
7134 ptr.p->m_gsn = 0;
7135 ptr.p->masterData.gsn = 0;
7136 c_backups.release(ptr);
7137 }
7138 }
7139
7140
7141 void
removeBackup(Signal * signal,BackupRecordPtr ptr)7142 Backup::removeBackup(Signal* signal, BackupRecordPtr ptr)
7143 {
7144 jam();
7145
7146 FsRemoveReq * req = (FsRemoveReq *)signal->getDataPtrSend();
7147 req->userReference = reference();
7148 req->userPointer = ptr.i;
7149 req->directory = 1;
7150 req->ownDirectory = 1;
7151 FsOpenReq::setVersion(req->fileNumber, 2);
7152 FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_CTL);
7153 FsOpenReq::v2_setSequence(req->fileNumber, ptr.p->backupId);
7154 FsOpenReq::v2_setNodeId(req->fileNumber, getOwnNodeId());
7155 sendSignal(NDBFS_REF, GSN_FSREMOVEREQ, signal,
7156 FsRemoveReq::SignalLength, JBA);
7157 }
7158
7159 void
execFSREMOVEREF(Signal * signal)7160 Backup::execFSREMOVEREF(Signal* signal)
7161 {
7162 jamEntry();
7163 FsRef * ref = (FsRef*)signal->getDataPtr();
7164 const Uint32 ptrI = ref->userPointer;
7165
7166 FsConf * conf = (FsConf*)signal->getDataPtr();
7167 conf->userPointer = ptrI;
7168 execFSREMOVECONF(signal);
7169 }
7170
7171 void
execFSREMOVECONF(Signal * signal)7172 Backup::execFSREMOVECONF(Signal* signal){
7173 jamEntry();
7174
7175 FsConf * conf = (FsConf*)signal->getDataPtr();
7176 const Uint32 ptrI = conf->userPointer;
7177
7178 /**
7179 * Get backup record
7180 */
7181 BackupRecordPtr ptr;
7182 c_backupPool.getPtr(ptr, ptrI);
7183 /*
7184 report of backup status uses these variables to keep track
7185 if backup ia running and current state
7186 */
7187 ptr.p->m_gsn = 0;
7188 ptr.p->masterData.gsn = 0;
7189 c_backups.release(ptr);
7190 }
7191
7192 /**
7193 * LCP execution starts.
7194 *
7195 * Description of local LCP handling when checkpointing one fragment locally in
7196 * this data node. DBLQH, BACKUP are executing always in the same thread. DICT
7197 * and NDBFS mostly execute in different threads.
7198 *
7199
7200 DBLQH BACKUP DICT NDBFS
7201 | |
7202 | LCP_PREPARE_REQ |
7203 |---------------------------->|
7204 | | FSOPENREQ
7205 | |----------------------------------->|
7206 | | FSOPENCONF |
7207 | |<-----------------------------------|
7208 | | GET_TABINFOREQ |
7209 | |----------------->|
7210 | | GET_TABINFO_CONF |
7211 | |<-----------------|
7212 | LCP_PREPARE_CONF |
7213 |<----------------------------|
7214 | BACKUP_FRAGMENT_REQ |-------> CONTINUEB(START_FILE_THREAD)|
7215 |---------------------------->|
7216 | SCAN_FRAGREQ |
7217 |<----------------------------|
7218 |
7219 | Potential CONTINUEB(ZTUP_SCAN) while scanning for tuples to record in LCP
7220 |
7221 | TRANSID_AI |
7222 |---------------------------->|
7223 |.... More TRANSID_AI | (Up to 16 TRANSID_AI, 1 per record)
7224 | SCAN_FRAGCONF(close_flag) |
7225 |---------------------------->|
7226 | SCAN_NEXTREQ |
7227 |<----------------------------|
7228 |
7229 | Potential CONTINUEB(ZTUP_SCAN) while scanning for tuples to record in LCP
7230 |
7231 | TRANSID_AI |
7232 |---------------------------->|
7233 |.... More TRANSID_AI | (Up to 16 TRANSID_AI, 1 per record)
7234 | SCAN_FRAGCONF(close_flag) |
7235 |---------------------------->|
7236
7237 After each SCAN_FRAGCONF we check of there is enough space in the Backup
7238 buffer used for the LCP. We will not check it until here, so the buffer
7239 must be big enough to be able to store the maximum size of 16 records
7240 in the buffer. Given that maximum record size is about 16kB, this means
7241 that we must have at least 256 kB of buffer space for LCPs. The default
7242 is 2MB, so should not set it lower than this unless trying to achieve
7243 a really memory optimised setup.
7244
7245 If there is currently no space in the LCP buffer, then the buffer is either
7246 waiting to be written to disk, or it is being written to disk. In this case
7247 we will send a CONTINUEB(BUFFER_FULL_SCAN) delayed signal until the buffer
7248 is available again.
7249
7250 When the buffer is available again we send a new SCAN_NEXTREQ for the next
7251 set of rows to be recorded in LCP.
7252
7253 CONTINUEB(START_FILE_THREAD) will either send a FSAPPENDREQ to the opened
7254 file or it will send a delayed CONTINUEB(BUFFER_UNDERFLOW).
7255
7256 When FSAPPENDCONF arrives it will make the same check again and either
7257 send one more file write through FSAPPENDREQ or another
7258 CONTINUEB(BUFFER_UNDERFLOW). It will continue like this until the
7259 SCAN_FRAGCONF has been sent with close_flag set to true AND all the buffers
7260 have been written to disk.
7261
7262 After the LCP file write have been completed the close of the fragment LCP
7263 is started.
7264
7265 An important consideration when executing LCPs is that they conflict with
7266 the normal processing of user commands such as key lookups, scans and so
7267 forth. If we execute on normal JBB-level everything we are going to get
7268 problems in that we could have job buffers of thousands of signals. This
7269 means that we will run the LCP extremely slow which will be a significant
7270 problem.
7271
7272 The other approach is to use JBA-level. This will obviously give the
7273 LCP too high priority, we will run LCPs until we have filled up the
7274 buffer or even until we have filled up our quota for the 100ms timeslot
7275 where we check for those things. This could end up in producing 10
7276 MByte of LCP data before allowing user level transactions again. This
7277 is also obviously not a good idea.
7278
7279 So most of the startup and shutdown logic for LCPs, both for the entire
7280 LCP and messages per fragment LCP is ok to raise to JBA level. They are
7281 short and concise messages and won't bother the user transactions at any
7282 noticable level. We will avoid fixing GET_TABINFO for that since it
7283 is only one signal per fragment LCP and also the code path is also used
7284 many other activitites which are not suitable to run at JBA-level.
7285
7286 So the major problem to handle is the actual scanning towards LQH. Here
7287 we need to use a mechanism that keeps the rate at appropriate levels.
7288 We will use a mix of keeping track of how many jobs were executed since
7289 last time we executed together with sending JBA-level signals to speed
7290 up LCP processing for a short time and using signals sent with delay 0
7291 to avoid being delayed for more than 128 signals (the maximum amount
7292 of signals executed before we check timed signals).
7293
7294 The first step to handle this is to ensure that we can send SCAN_FRAGREQ
7295 on priority A and that this also causes the resulting signals that these
7296 messages generate also to be sent on priority A level. Then each time
7297 we can continue the scan immediately after receiving SCAN_FRAGCONF we
7298 need to make a decision at which level to send the signal. We can
7299 either send it as delayed signal with 0 delay or we could send them
7300 at priority A level to get another chunk of data for the LCP at a high
7301 priority.
7302
7303 We send the information about Priority A-level as a flag in the
7304 SCAN_FRAGREQ signal. This will ensure that all resulting signals
7305 will be sent on Priority A except the CONTINUEB(ZTUP_SCAN) which
7306 will get special treatment where it increases the length of the
7307 loop counter and sends the signal with delay 0. We cannot send
7308 this signal on priority level A since there is no bound on how
7309 long it will execute.
7310
7311 DBLQH BACKUP DICT NDBFS
7312 | | FSCLOSEREQ
7313 | |------------------------------------>|
7314 | | FSCLOSECONF
7315 | |<------------------------------------|
7316 | BACKUP_FRAGMENT_CONF |
7317 |<----------------------------|
7318 |
7319 | DIH
7320 | LCP_FRAG_REP |
7321 |--------------------->|
7322
7323 Finally after completing all fragments we have a number of signals sent to
7324 complete the LCP processing.
7325
7326 | END_LCPREQ |
7327 |---------------------------->|
7328 | END_LCPCONF |
7329 |<----------------------------|
7330 |
7331 LQH Proxy PGMAN(extra) LGMAN TSMAN
7332 | LCP_COMPLETE_REP |
7333 |---------------------------->|
7334
7335 Here the LQH Proxy block will wait for all DBLQH instances to complete.
7336 After all have complete the following signals will be sent.
7337 LQH Proxy PGMAN(extra) LGMAN TSMAN
7338
7339 | END_LCPREQ |
7340 |----------->|
7341 | END_LCPCONF|
7342 |<-----------|
7343 | END_LCPREQ |
7344 |---------------------------------->|
7345 | END_LCPREQ |
7346 |-------------------------->|
7347 | END_LCPCONF |
7348 |<--------------------------|
7349 |
7350 | LCP_COMPLETE_REP(DBLQH) sent to DIH
7351
7352 The TSMAN block doesn't respond to END_LCPREQ. The LGMAN is required to be
7353 involved at the end of the LCP to ensure that the UNDO log have been fully
7354 synched to disk before we report the LCP as complete. We won't use any
7355 fragment LCPs until the full LCP is complete for disk data due to this.
7356
7357 As preparation for this DBLQH sent DEFINE_BACKUP_REQ to setup a backup
7358 record in restart phase 4. It must get the response DEFINE_BACKUP_CONF for
7359 the restart to successfully complete. This signal allocates memory for the
7360 LCP buffers.
7361 */
7362 void
execLCP_PREPARE_REQ(Signal * signal)7363 Backup::execLCP_PREPARE_REQ(Signal* signal)
7364 {
7365 jamEntry();
7366 LcpPrepareReq req = *(LcpPrepareReq*)signal->getDataPtr();
7367
7368 BackupRecordPtr ptr;
7369 c_backupPool.getPtr(ptr, req.backupPtr);
7370
7371 ptr.p->m_gsn = GSN_LCP_PREPARE_REQ;
7372
7373 TablePtr tabPtr;
7374 FragmentPtr fragPtr;
7375 if (!ptr.p->tables.isEmpty())
7376 {
7377 jam();
7378 ndbrequire(ptr.p->errorCode);
7379 ptr.p->tables.first(tabPtr);
7380 if (tabPtr.p->tableId == req.tableId)
7381 {
7382 jam();
7383 ndbrequire(!tabPtr.p->fragments.empty());
7384 tabPtr.p->fragments.getPtr(fragPtr, 0);
7385 fragPtr.p->fragmentId = req.fragmentId;
7386 defineBackupRef(signal, ptr, ptr.p->errorCode);
7387 return;
7388 }
7389 else
7390 {
7391 jam();
7392 tabPtr.p->fragments.release();
7393 while (ptr.p->tables.releaseFirst());
7394 ptr.p->errorCode = 0;
7395 // fall-through
7396 }
7397 }
7398
7399 if (!ptr.p->tables.seizeLast(tabPtr) || !tabPtr.p->fragments.seize(1))
7400 {
7401 if(!tabPtr.isNull())
7402 while (ptr.p->tables.releaseFirst());
7403 ndbrequire(false); // TODO
7404 }
7405 tabPtr.p->tableId = req.tableId;
7406 tabPtr.p->fragments.getPtr(fragPtr, 0);
7407 tabPtr.p->tableType = DictTabInfo::UserTable;
7408 fragPtr.p->fragmentId = req.fragmentId;
7409 fragPtr.p->lcp_no = req.lcpNo;
7410 fragPtr.p->scanned = 0;
7411 fragPtr.p->scanning = 0;
7412 fragPtr.p->tableId = req.tableId;
7413
7414 if (req.backupId != ptr.p->backupId)
7415 {
7416 jam();
7417 /* New LCP, reset per-LCP counters */
7418 ptr.p->noOfBytes = 0;
7419 ptr.p->noOfRecords = 0;
7420 }
7421 ptr.p->backupId= req.backupId;
7422 lcp_open_file(signal, ptr);
7423 }
7424
7425 void
lcp_close_file_conf(Signal * signal,BackupRecordPtr ptr)7426 Backup::lcp_close_file_conf(Signal* signal, BackupRecordPtr ptr)
7427 {
7428 jam();
7429
7430 TablePtr tabPtr;
7431 ndbrequire(ptr.p->tables.first(tabPtr));
7432 Uint32 tableId = tabPtr.p->tableId;
7433
7434 BackupFilePtr filePtr;
7435 c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr);
7436 ndbrequire(filePtr.p->m_flags == 0);
7437
7438 if (ptr.p->m_gsn == GSN_LCP_PREPARE_REQ)
7439 {
7440 jam();
7441 defineBackupRef(signal, ptr, ptr.p->errorCode);
7442 return;
7443 }
7444
7445 FragmentPtr fragPtr;
7446 tabPtr.p->fragments.getPtr(fragPtr, 0);
7447 Uint32 fragmentId = fragPtr.p->fragmentId;
7448
7449 tabPtr.p->fragments.release();
7450 while (ptr.p->tables.releaseFirst());
7451
7452 if (ptr.p->errorCode != 0)
7453 {
7454 jam();
7455 ndbout_c("Fatal : LCP Frag scan failed with error %u",
7456 ptr.p->errorCode);
7457 ndbrequire(filePtr.p->errorCode == ptr.p->errorCode);
7458
7459 if ((filePtr.p->m_flags & BackupFile::BF_SCAN_THREAD) == 0)
7460 {
7461 jam();
7462 /* No active scan thread to 'find' the file error.
7463 * Scan is closed, so let's send backupFragmentRef
7464 * back to LQH now...
7465 */
7466 backupFragmentRef(signal, filePtr);
7467 }
7468 return;
7469 }
7470
7471 OperationRecord & op = filePtr.p->operation;
7472 ptr.p->errorCode = 0;
7473
7474 BackupFragmentConf * conf = (BackupFragmentConf*)signal->getDataPtrSend();
7475 conf->backupId = ptr.p->backupId;
7476 conf->backupPtr = ptr.i;
7477 conf->tableId = tableId;
7478 conf->fragmentNo = fragmentId;
7479 conf->noOfRecordsLow = (op.noOfRecords & 0xFFFFFFFF);
7480 conf->noOfRecordsHigh = (op.noOfRecords >> 32);
7481 conf->noOfBytesLow = (op.noOfBytes & 0xFFFFFFFF);
7482 conf->noOfBytesHigh = (op.noOfBytes >> 32);
7483 sendSignal(ptr.p->masterRef, GSN_BACKUP_FRAGMENT_CONF, signal,
7484 BackupFragmentConf::SignalLength, JBA);
7485 }
7486
7487 void
lcp_open_file(Signal * signal,BackupRecordPtr ptr)7488 Backup::lcp_open_file(Signal* signal, BackupRecordPtr ptr)
7489 {
7490 FsOpenReq * req = (FsOpenReq *)signal->getDataPtrSend();
7491 req->userReference = reference();
7492 req->fileFlags =
7493 FsOpenReq::OM_WRITEONLY |
7494 FsOpenReq::OM_TRUNCATE |
7495 FsOpenReq::OM_CREATE |
7496 FsOpenReq::OM_APPEND |
7497 FsOpenReq::OM_AUTOSYNC;
7498
7499 if (c_defaults.m_compressed_lcp)
7500 req->fileFlags |= FsOpenReq::OM_GZ;
7501
7502 if (c_defaults.m_o_direct)
7503 req->fileFlags |= FsOpenReq::OM_DIRECT;
7504 FsOpenReq::v2_setCount(req->fileNumber, 0xFFFFFFFF);
7505 req->auto_sync_size = c_defaults.m_disk_synch_size;
7506
7507 TablePtr tabPtr;
7508 FragmentPtr fragPtr;
7509
7510 ndbrequire(ptr.p->tables.first(tabPtr));
7511 tabPtr.p->fragments.getPtr(fragPtr, 0);
7512
7513 /**
7514 * Lcp file
7515 */
7516 BackupFilePtr filePtr;
7517 c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr);
7518 ndbrequire(filePtr.p->m_flags == 0);
7519 filePtr.p->m_flags |= BackupFile::BF_OPENING;
7520 filePtr.p->tableId = RNIL; // Will force init
7521 req->userPointer = filePtr.i;
7522 FsOpenReq::setVersion(req->fileNumber, 5);
7523 FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_DATA);
7524 FsOpenReq::v5_setLcpNo(req->fileNumber, fragPtr.p->lcp_no);
7525 FsOpenReq::v5_setTableId(req->fileNumber, tabPtr.p->tableId);
7526 FsOpenReq::v5_setFragmentId(req->fileNumber, fragPtr.p->fragmentId);
7527 sendSignal(NDBFS_REF, GSN_FSOPENREQ, signal, FsOpenReq::SignalLength, JBA);
7528 }
7529
7530 void
lcp_open_file_done(Signal * signal,BackupRecordPtr ptr)7531 Backup::lcp_open_file_done(Signal* signal, BackupRecordPtr ptr)
7532 {
7533 TablePtr tabPtr;
7534 FragmentPtr fragPtr;
7535
7536 ndbrequire(ptr.p->tables.first(tabPtr));
7537 tabPtr.p->fragments.getPtr(fragPtr, 0);
7538
7539 BackupFilePtr filePtr;
7540 c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr);
7541 ndbrequire(filePtr.p->m_flags ==
7542 (BackupFile::BF_OPEN | BackupFile::BF_LCP_META));
7543 filePtr.p->m_flags &= ~(Uint32)BackupFile::BF_LCP_META;
7544
7545 ptr.p->slaveState.setState(STARTED);
7546
7547 LcpPrepareConf* conf= (LcpPrepareConf*)signal->getDataPtrSend();
7548 conf->senderData = ptr.p->clientData;
7549 conf->senderRef = reference();
7550 conf->tableId = tabPtr.p->tableId;
7551 conf->fragmentId = fragPtr.p->fragmentId;
7552 sendSignal(ptr.p->masterRef, GSN_LCP_PREPARE_CONF,
7553 signal, LcpPrepareConf::SignalLength, JBA);
7554
7555 /**
7556 * Start file thread
7557 */
7558 filePtr.p->m_flags |= BackupFile::BF_FILE_THREAD;
7559
7560 signal->theData[0] = BackupContinueB::START_FILE_THREAD;
7561 signal->theData[1] = filePtr.i;
7562 signal->theData[2] = __LINE__;
7563 sendSignal(reference(), GSN_CONTINUEB, signal, 3, JBB);
7564 }
7565
7566 void
execEND_LCPREQ(Signal * signal)7567 Backup::execEND_LCPREQ(Signal* signal)
7568 {
7569 EndLcpReq* req= (EndLcpReq*)signal->getDataPtr();
7570
7571 BackupRecordPtr ptr;
7572 c_backupPool.getPtr(ptr, req->backupPtr);
7573 /**
7574 * At least one table should exist here, it isn't possible
7575 * to drop the system table, so this should always be part
7576 * of an LCP. Thus we can be safe that the backupId should
7577 * be set (it is set when a LCP is started on a fragment.
7578 */
7579 ndbrequire(ptr.p->backupId == req->backupId);
7580
7581 BackupFilePtr filePtr;
7582 ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr);
7583 ndbrequire(filePtr.p->m_flags == 0);
7584
7585 if (!ptr.p->tables.isEmpty())
7586 {
7587 jam();
7588 ndbrequire(ptr.p->errorCode);
7589 TablePtr tabPtr;
7590 ptr.p->tables.first(tabPtr);
7591 tabPtr.p->fragments.release();
7592 while (ptr.p->tables.releaseFirst());
7593 ptr.p->errorCode = 0;
7594 }
7595
7596 ptr.p->errorCode = 0;
7597 ptr.p->slaveState.setState(CLEANING);
7598 ptr.p->slaveState.setState(INITIAL);
7599 ptr.p->slaveState.setState(DEFINING);
7600 ptr.p->slaveState.setState(DEFINED);
7601
7602 EndLcpConf* conf= (EndLcpConf*)signal->getDataPtr();
7603 conf->senderData = ptr.p->clientData;
7604 conf->senderRef = reference();
7605 sendSignal(ptr.p->masterRef, GSN_END_LCPCONF,
7606 signal, EndLcpConf::SignalLength, JBA);
7607 }
7608
7609 inline
7610 static
setWords(const Uint64 src,Uint32 & hi,Uint32 & lo)7611 void setWords(const Uint64 src, Uint32& hi, Uint32& lo)
7612 {
7613 hi = (Uint32) (src >> 32);
7614 lo = (Uint32) (src & 0xffffffff);
7615 }
7616
7617 void
execLCP_STATUS_REQ(Signal * signal)7618 Backup::execLCP_STATUS_REQ(Signal* signal)
7619 {
7620 jamEntry();
7621 const LcpStatusReq* req = (const LcpStatusReq*) signal->getDataPtr();
7622
7623 const Uint32 senderRef = req->senderRef;
7624 const Uint32 senderData = req->senderData;
7625 Uint32 failCode = LcpStatusRef::NoLCPRecord;
7626
7627 /* Find LCP backup, if there is one */
7628 BackupRecordPtr ptr;
7629 bool found_lcp = false;
7630 for (c_backups.first(ptr); ptr.i != RNIL; c_backups.next(ptr))
7631 {
7632 jam();
7633 if (ptr.p->is_lcp())
7634 {
7635 jam();
7636 ndbrequire(found_lcp == false); /* Just one LCP */
7637 found_lcp = true;
7638
7639 LcpStatusConf::LcpState state = LcpStatusConf::LCP_IDLE;
7640 switch (ptr.p->slaveState.getState())
7641 {
7642 case STARTED:
7643 jam();
7644 state = LcpStatusConf::LCP_PREPARED;
7645 break;
7646 case SCANNING:
7647 jam();
7648 state = LcpStatusConf::LCP_SCANNING;
7649 break;
7650 case STOPPING:
7651 jam();
7652 state = LcpStatusConf::LCP_SCANNED;
7653 break;
7654 case DEFINED:
7655 jam();
7656 state = LcpStatusConf::LCP_IDLE;
7657 break;
7658 default:
7659 jam();
7660 ndbout_c("Unusual LCP state in LCP_STATUS_REQ() : %u",
7661 ptr.p->slaveState.getState());
7662 state = LcpStatusConf::LCP_IDLE;
7663 };
7664
7665 /* Not all values are set here */
7666 const Uint32 UnsetConst = ~0;
7667
7668 LcpStatusConf* conf = (LcpStatusConf*) signal->getDataPtr();
7669 conf->senderRef = reference();
7670 conf->senderData = senderData;
7671 conf->lcpState = state;
7672 conf->tableId = UnsetConst;
7673 conf->fragId = UnsetConst;
7674 conf->completionStateHi = UnsetConst;
7675 conf->completionStateLo = UnsetConst;
7676 setWords(ptr.p->noOfRecords,
7677 conf->lcpDoneRowsHi,
7678 conf->lcpDoneRowsLo);
7679 setWords(ptr.p->noOfBytes,
7680 conf->lcpDoneBytesHi,
7681 conf->lcpDoneBytesLo);
7682 conf->lcpScannedPages = 0;
7683
7684 if (state == LcpStatusConf::LCP_SCANNING ||
7685 state == LcpStatusConf::LCP_SCANNED)
7686 {
7687 jam();
7688 /* Actually scanning/closing a fragment, let's grab the details */
7689 TablePtr tabPtr;
7690 FragmentPtr fragPtr;
7691 BackupFilePtr filePtr;
7692
7693 if (ptr.p->dataFilePtr == RNIL)
7694 {
7695 jam();
7696 failCode = LcpStatusRef::NoFileRecord;
7697 break;
7698 }
7699 c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr);
7700 ndbrequire(filePtr.p->backupPtr == ptr.i);
7701
7702 ptr.p->tables.first(tabPtr);
7703 if (tabPtr.i != RNIL)
7704 {
7705 jam();
7706 tabPtr.p->fragments.getPtr(fragPtr, 0);
7707 ndbrequire(fragPtr.p->tableId == tabPtr.p->tableId);
7708 conf->tableId = tabPtr.p->tableId;
7709 conf->fragId = fragPtr.p->fragmentId;
7710 }
7711
7712 if (state == LcpStatusConf::LCP_SCANNING)
7713 {
7714 jam();
7715 setWords(filePtr.p->operation.noOfRecords,
7716 conf->completionStateHi,
7717 conf->completionStateLo);
7718 conf->lcpScannedPages = filePtr.p->operation.lcpScannedPages;
7719 }
7720 else if (state == LcpStatusConf::LCP_SCANNED)
7721 {
7722 jam();
7723 /* May take some time to drain the FS buffer, depending on
7724 * size of buff, achieved rate.
7725 * We provide the buffer fill level so that requestors
7726 * can observe whether there's progress in this phase.
7727 */
7728 Uint64 flushBacklog =
7729 filePtr.p->operation.dataBuffer.getUsableSize() -
7730 filePtr.p->operation.dataBuffer.getFreeSize();
7731
7732 setWords(flushBacklog,
7733 conf->completionStateHi,
7734 conf->completionStateLo);
7735 }
7736 }
7737
7738 failCode = 0;
7739 }
7740 }
7741
7742 if (failCode == 0)
7743 {
7744 jam();
7745 sendSignal(senderRef, GSN_LCP_STATUS_CONF,
7746 signal, LcpStatusConf::SignalLength, JBB);
7747 return;
7748 }
7749
7750 jam();
7751 LcpStatusRef* ref = (LcpStatusRef*) signal->getDataPtr();
7752
7753 ref->senderRef = reference();
7754 ref->senderData = senderData;
7755 ref->error = failCode;
7756
7757 sendSignal(senderRef, GSN_LCP_STATUS_REF,
7758 signal, LcpStatusRef::SignalLength, JBB);
7759 return;
7760 }
7761