1 /*
2 Copyright (c) 2003, 2021, Oracle and/or its affiliates.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #include <NDBT.hpp>
26 #include <NDBT_Test.hpp>
27 #include <HugoTransactions.hpp>
28 #include <UtilTransactions.hpp>
29 #include <NdbRestarter.hpp>
30 #include <Vector.hpp>
31 #include <signaldata/DumpStateOrd.hpp>
32 #include <NdbBackup.hpp>
33 #include <Bitmask.hpp>
34 #include <DbUtil.hpp>
35
runLoadTable(NDBT_Context * ctx,NDBT_Step * step)36 int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){
37
38 int records = ctx->getNumRecords();
39 HugoTransactions hugoTrans(*ctx->getTab());
40 if (hugoTrans.loadTable(GETNDB(step), records) != 0){
41 return NDBT_FAILED;
42 }
43 return NDBT_OK;
44 }
45
runFillTable(NDBT_Context * ctx,NDBT_Step * step)46 int runFillTable(NDBT_Context* ctx, NDBT_Step* step){
47 Ndb* pNdb = GETNDB(step);
48 NdbDictionary::Table tab(*ctx->getTab());
49
50 /* fill table until its full */
51 HugoTransactions hugoTrans(tab);
52 if(hugoTrans.fillTable(pNdb) != 0){
53 return NDBT_FAILED;
54 }
55
56 /* store the number of rows */
57 int cnt;
58 UtilTransactions utilTrans(tab);
59 if(utilTrans.selectCount(pNdb, 0, &cnt) != 0){
60 g_err << "Select count failed." << endl;
61 return NDBT_FAILED;
62 }
63 ctx->setProperty("recordCount", cnt);
64 return NDBT_OK;
65 }
66
runVerifyFilledTables(NDBT_Context * ctx,NDBT_Step * step)67 int runVerifyFilledTables(NDBT_Context* ctx, NDBT_Step* step)
68 {
69 /* verify the number of rows is intact */
70 Ndb* pNdb = GETNDB(step);
71 int countOld= ctx->getProperty("recordCount");
72 if (countOld == 0){
73 /* table was not filled using fillTable */
74 g_err << "Table initial row count not available" << endl;
75 return NDBT_FAILED;
76 }
77 /* ctx's tab gets invalidated in alter table reorganize partition
78 Hence reloading table again to verify */
79 const char *tableName= ctx->getTableName(0);
80 const NdbDictionary::Table* pTab =
81 NDBT_Table::discoverTableFromDb(pNdb, tableName);
82 if (pTab == NULL){
83 g_err << tableName << " was lost during the test." << endl;
84 return NDBT_FAILED;
85 }
86
87 /* compare new record count with old */
88 int cnt;
89 UtilTransactions utilTrans(*pTab);
90 if(utilTrans.selectCount(pNdb, 0, &cnt) != 0){
91 g_err << "Select count failed." << endl;
92 return NDBT_FAILED;
93 }
94 if(cnt != countOld){
95 g_err << "Number of rows in result table different from expected" << endl;
96 return NDBT_FAILED;
97 }
98 return NDBT_OK;
99 }
100
101 int
clearOldBackups(NDBT_Context * ctx,NDBT_Step * step)102 clearOldBackups(NDBT_Context* ctx, NDBT_Step* step)
103 {
104 NdbBackup backup;
105 backup.clearOldBackups();
106 return NDBT_OK;
107 }
108
109 #define CHECK(b) if (!(b)) { \
110 g_err << "ERR: "<< step->getName() \
111 << " failed on line " << __LINE__ << endl; \
112 result = NDBT_FAILED; \
113 continue; }
114
runSystemRestart1(NDBT_Context * ctx,NDBT_Step * step)115 int runSystemRestart1(NDBT_Context* ctx, NDBT_Step* step){
116 Ndb* pNdb = GETNDB(step);
117 int result = NDBT_OK;
118 int timeout = 300;
119 Uint32 loops = ctx->getNumLoops();
120 int records = ctx->getNumRecords();
121 int count;
122 NdbRestarter restarter;
123 Uint32 i = 1;
124
125 UtilTransactions utilTrans(*ctx->getTab());
126 HugoTransactions hugoTrans(*ctx->getTab());
127 while(i<=loops && result != NDBT_FAILED){
128
129 ndbout << "Loop " << i << "/"<< loops <<" started" << endl;
130 /*
131 1. Load data
132 2. Restart cluster and verify records
133 3. Update records
134 4. Restart cluster and verify records
135 5. Delete half of the records
136 6. Restart cluster and verify records
137 7. Delete all records
138 8. Restart cluster and verify records
139 9. Insert, update, delete records
140 10. Restart cluster and verify records
141 11. Insert, update, delete records
142 12. Restart cluster with error insert 5020 and verify records
143 */
144 ndbout << "Loading records..." << endl;
145 CHECK(hugoTrans.loadTable(pNdb, records) == 0);
146
147 ndbout << "Restarting cluster" << endl;
148 CHECK(restarter.restartAll() == 0);
149 CHECK(restarter.waitClusterStarted(timeout) == 0);
150 CHECK(pNdb->waitUntilReady(timeout) == 0);
151
152 ndbout << "Verifying records..." << endl;
153 CHECK(hugoTrans.pkReadRecords(pNdb, records) == 0);
154 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
155 CHECK(count == records);
156
157 ndbout << "Updating records..." << endl;
158 CHECK(hugoTrans.pkUpdateRecords(pNdb, records) == 0);
159
160 ndbout << "Restarting cluster..." << endl;
161 CHECK(restarter.restartAll() == 0);
162 CHECK(restarter.waitClusterStarted(timeout) == 0);
163 CHECK(pNdb->waitUntilReady(timeout) == 0);
164
165 ndbout << "Verifying records..." << endl;
166 CHECK(hugoTrans.pkReadRecords(pNdb, records) == 0);
167 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
168 CHECK(count == records);
169
170 ndbout << "Deleting 50% of records..." << endl;
171 CHECK(hugoTrans.pkDelRecords(pNdb, records/2) == 0);
172
173 ndbout << "Restarting cluster..." << endl;
174 CHECK(restarter.restartAll() == 0);
175 CHECK(restarter.waitClusterStarted(timeout) == 0);
176 CHECK(pNdb->waitUntilReady(timeout) == 0);
177
178 ndbout << "Verifying records..." << endl;
179 CHECK(hugoTrans.scanReadRecords(pNdb, records/2, 0, 64) == 0);
180 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
181 CHECK(count == (records/2));
182
183 ndbout << "Deleting all records..." << endl;
184 CHECK(utilTrans.clearTable(pNdb, records/2) == 0);
185
186 ndbout << "Restarting cluster..." << endl;
187 CHECK(restarter.restartAll() == 0);
188 CHECK(restarter.waitClusterStarted(timeout) == 0);
189 CHECK(pNdb->waitUntilReady(timeout) == 0);
190
191 ndbout << "Verifying records..." << endl;
192 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
193 CHECK(count == 0);
194
195 ndbout << "Doing it all..." << endl;
196 CHECK(hugoTrans.loadTable(pNdb, records) == 0);
197 CHECK(hugoTrans.pkUpdateRecords(pNdb, records) == 0);
198 CHECK(hugoTrans.pkDelRecords(pNdb, records/2) == 0);
199 CHECK(hugoTrans.scanUpdateRecords(pNdb, records/2) == 0);
200 CHECK(utilTrans.clearTable(pNdb, records) == 0);
201 CHECK(hugoTrans.loadTable(pNdb, records) == 0);
202 CHECK(utilTrans.clearTable(pNdb, records) == 0);
203 CHECK(hugoTrans.loadTable(pNdb, records) == 0);
204 CHECK(hugoTrans.pkUpdateRecords(pNdb, records) == 0);
205 CHECK(utilTrans.clearTable(pNdb, records) == 0);
206
207 ndbout << "Restarting cluster..." << endl;
208 CHECK(restarter.restartAll() == 0);
209 CHECK(restarter.waitClusterStarted(timeout) == 0);
210 CHECK(pNdb->waitUntilReady(timeout) == 0);
211
212 ndbout << "Verifying records..." << endl;
213 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
214 CHECK(count == 0);
215
216 ndbout << "Doing it all..." << endl;
217 CHECK(hugoTrans.loadTable(pNdb, records) == 0);
218 CHECK(hugoTrans.pkUpdateRecords(pNdb, records) == 0);
219 CHECK(hugoTrans.pkDelRecords(pNdb, records/2) == 0);
220 CHECK(hugoTrans.scanUpdateRecords(pNdb, records/2) == 0);
221 CHECK(utilTrans.clearTable(pNdb, records) == 0);
222 CHECK(hugoTrans.loadTable(pNdb, records) == 0);
223 CHECK(utilTrans.clearTable(pNdb, records) == 0);
224
225 ndbout << "Restarting cluster with error insert 5020..." << endl;
226 CHECK(restarter.restartAll(false, true) == 0);
227 CHECK(restarter.waitClusterNoStart(timeout) == 0);
228 CHECK(restarter.insertErrorInAllNodes(5020) == 0);
229 CHECK(restarter.startAll() == 0);
230 CHECK(restarter.waitClusterStarted(timeout) == 0);
231 CHECK(pNdb->waitUntilReady(timeout) == 0);
232
233 i++;
234 }
235
236 ndbout << "runSystemRestart1 finished" << endl;
237
238 return result;
239 }
240
runSystemRestart2(NDBT_Context * ctx,NDBT_Step * step)241 int runSystemRestart2(NDBT_Context* ctx, NDBT_Step* step){
242 Ndb* pNdb = GETNDB(step);
243 int result = NDBT_OK;
244 /// int timeout = 300;
245 int timeout = 120;
246 Uint32 loops = ctx->getNumLoops();
247 int records = ctx->getNumRecords();
248 int count;
249 NdbRestarter restarter;
250 Uint32 i = 1;
251
252 UtilTransactions utilTrans(*ctx->getTab());
253 HugoTransactions hugoTrans(*ctx->getTab());
254 while(i<=loops && result != NDBT_FAILED && !ctx->isTestStopped()){
255
256 ndbout << "Loop " << i << "/"<< loops <<" started" << endl;
257 /* Use error 7070 to set time between LCP to it's min value
258 1. Load data
259 2. Restart cluster and verify records
260 3. Update records
261 4. Restart cluster and verify records
262 5. Delete half of the records
263 6. Restart cluster and verify records
264 7. Delete all records
265 8. Restart cluster and verify records
266 9. Insert, update, delete records
267 10. Restart cluster and verify records
268 */
269 int val = DumpStateOrd::DihMinTimeBetweenLCP;
270 CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
271
272 ndbout << "Loading records..." << endl;
273 CHECK(hugoTrans.loadTable(pNdb, records) == 0);
274
275 ndbout << "Restarting cluster" << endl;
276 CHECK(restarter.restartAll() == 0);
277 CHECK(restarter.waitClusterStarted(timeout) == 0);
278 {
279 int val = DumpStateOrd::DihMinTimeBetweenLCP;
280 CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
281 }
282 CHECK(pNdb->waitUntilReady(timeout) == 0);
283
284 ndbout << "Verifying records..." << endl;
285 CHECK(hugoTrans.pkReadRecords(pNdb, records) == 0);
286 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
287 CHECK(count == records);
288
289 ndbout << "Updating records..." << endl;
290 CHECK(hugoTrans.pkUpdateRecords(pNdb, records) == 0);
291
292 ndbout << "Restarting cluster..." << endl;
293 CHECK(restarter.restartAll() == 0);
294 CHECK(restarter.waitClusterStarted(timeout) == 0);
295 {
296 int val = DumpStateOrd::DihMinTimeBetweenLCP;
297 CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
298 }
299 CHECK(pNdb->waitUntilReady(timeout) == 0);
300
301 ndbout << "Verifying records..." << endl;
302 CHECK(hugoTrans.pkReadRecords(pNdb, records) == 0);
303 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
304 CHECK(count == records);
305
306 ndbout << "Deleting 50% of records..." << endl;
307 CHECK(hugoTrans.pkDelRecords(pNdb, records/2) == 0);
308
309 ndbout << "Restarting cluster..." << endl;
310 CHECK(restarter.restartAll() == 0);
311 CHECK(restarter.waitClusterStarted(timeout) == 0);
312 {
313 int val = DumpStateOrd::DihMinTimeBetweenLCP;
314 CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
315 }
316 CHECK(pNdb->waitUntilReady(timeout) == 0);
317
318 ndbout << "Verifying records..." << endl;
319 CHECK(hugoTrans.scanReadRecords(pNdb, records/2, 0, 64) == 0);
320 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
321 CHECK(count == (records/2));
322
323 ndbout << "Deleting all records..." << endl;
324 CHECK(utilTrans.clearTable(pNdb, records/2) == 0);
325
326 ndbout << "Restarting cluster..." << endl;
327 CHECK(restarter.restartAll() == 0);
328 CHECK(restarter.waitClusterStarted(timeout) == 0);
329 {
330 int val = DumpStateOrd::DihMinTimeBetweenLCP;
331 CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
332 }
333 CHECK(pNdb->waitUntilReady(timeout) == 0);
334
335 ndbout << "Verifying records..." << endl;
336 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
337 CHECK(count == 0);
338
339 ndbout << "Doing it all..." << endl;
340 CHECK(hugoTrans.loadTable(pNdb, records) == 0);
341 CHECK(hugoTrans.pkUpdateRecords(pNdb, records) == 0);
342 CHECK(hugoTrans.pkDelRecords(pNdb, records/2) == 0);
343 CHECK(hugoTrans.scanUpdateRecords(pNdb, records/2) == 0);
344 CHECK(utilTrans.clearTable(pNdb, records) == 0);
345 CHECK(hugoTrans.loadTable(pNdb, records) == 0);
346 CHECK(utilTrans.clearTable(pNdb, records) == 0);
347 CHECK(hugoTrans.loadTable(pNdb, records) == 0);
348 CHECK(hugoTrans.pkUpdateRecords(pNdb, records) == 0);
349 CHECK(utilTrans.clearTable(pNdb, records) == 0);
350
351 ndbout << "Restarting cluster..." << endl;
352 CHECK(restarter.restartAll() == 0);
353 CHECK(restarter.waitClusterStarted(timeout) == 0);
354 {
355 int val = DumpStateOrd::DihMinTimeBetweenLCP;
356 CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
357 }
358 CHECK(pNdb->waitUntilReady(timeout) == 0);
359
360 ndbout << "Verifying records..." << endl;
361 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
362 CHECK(count == 0);
363
364 i++;
365 }
366
367 ndbout << "runSystemRestart2 finished" << endl;
368
369 return result;
370 }
371
runSystemRestartTestUndoLog(NDBT_Context * ctx,NDBT_Step * step)372 int runSystemRestartTestUndoLog(NDBT_Context* ctx, NDBT_Step* step){
373 Ndb* pNdb = GETNDB(step);
374 int result = NDBT_OK;
375 int timeout = 300;
376 Uint32 loops = ctx->getNumLoops();
377 int records = ctx->getNumRecords();
378 int count;
379 NdbRestarter restarter;
380 Uint32 i = 1;
381
382 int dump7080[2];
383 dump7080[0] = 7080;
384 dump7080[1] = ctx->getTab()->getTableId();
385
386 UtilTransactions utilTrans(*ctx->getTab());
387 HugoTransactions hugoTrans(*ctx->getTab());
388 while(i<=loops && result != NDBT_FAILED){
389
390 ndbout << "Loop " << i << "/"<< loops <<" started" << endl;
391 /*
392 1. Start LCP, turn on undologging but delay write of datapages.
393 2. Insert, update, delete records
394 3. Complete writing of data pages and finish LCP.
395 4. Restart cluster and verify records
396 */
397 // Use dump state 7080 to delay writing of datapages
398 // for the current table
399 ndbout << "Dump state: "<<dump7080[0]<<", "<<dump7080[1]<<endl;
400 CHECK(restarter.dumpStateAllNodes(dump7080, 2) == 0);
401 NdbSleep_SecSleep(10);
402
403 ndbout << "Doing it all..." << endl;
404 CHECK(hugoTrans.loadTable(pNdb, records) == 0);
405 CHECK(hugoTrans.pkUpdateRecords(pNdb, records) == 0);
406 CHECK(hugoTrans.pkDelRecords(pNdb, records/2) == 0);
407 CHECK(hugoTrans.scanUpdateRecords(pNdb, records/2) == 0);
408 CHECK(utilTrans.clearTable(pNdb, records) == 0);
409 CHECK(hugoTrans.loadTable(pNdb, records) == 0);
410 CHECK(utilTrans.clearTable(pNdb, records) == 0);
411
412 // Reset error and let LCP continue
413 CHECK(restarter.insertErrorInAllNodes(0) == 0);
414 NdbSleep_SecSleep(60);
415
416 ndbout << "Restarting cluster..." << endl;
417 CHECK(restarter.restartAll() == 0);
418 CHECK(restarter.waitClusterStarted(timeout) == 0);
419 CHECK(pNdb->waitUntilReady(timeout) == 0);
420
421 ndbout << "Verifying records..." << endl;
422 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
423 CHECK(count == 0);
424
425 // Use dump state 7080 to delay writing of datapages
426 // for the current table
427 ndbout << "Dump state: "<<dump7080[0]<<", "<<dump7080[1]<<endl;
428 CHECK(restarter.dumpStateAllNodes(dump7080, 2) == 0);
429 NdbSleep_SecSleep(10);
430
431 ndbout << "Doing it all, delete 50%..." << endl;
432 CHECK(hugoTrans.loadTable(pNdb, records) == 0);
433 CHECK(hugoTrans.pkUpdateRecords(pNdb, records) == 0);
434 CHECK(hugoTrans.pkDelRecords(pNdb, records/2) == 0);
435
436 // Reset error and let LCP continue
437 CHECK(restarter.insertErrorInAllNodes(0) == 0);
438 NdbSleep_SecSleep(20);
439
440 ndbout << "Restarting cluster..." << endl;
441 CHECK(restarter.restartAll() == 0);
442 CHECK(restarter.waitClusterStarted(timeout) == 0);
443 CHECK(pNdb->waitUntilReady(timeout) == 0);
444
445 ndbout << "Verifying records..." << endl;
446 CHECK(hugoTrans.scanReadRecords(pNdb, records/2, 0, 64) == 0);
447 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
448 CHECK(count == (records/2));
449 CHECK(utilTrans.clearTable(pNdb, records) == 0);
450
451 i++;
452 }
453
454 ndbout << "runSystemRestartTestUndoLog finished" << endl;
455
456 return result;
457 }
458
runSystemRestartTestFullDb(NDBT_Context * ctx,NDBT_Step * step)459 int runSystemRestartTestFullDb(NDBT_Context* ctx, NDBT_Step* step){
460 Ndb* pNdb = GETNDB(step);
461 int result = NDBT_OK;
462 int timeout = 300;
463 Uint32 loops = ctx->getNumLoops();
464 int count1, count2;
465 NdbRestarter restarter;
466 Uint32 i = 1;
467
468 UtilTransactions utilTrans(*ctx->getTab());
469 HugoTransactions hugoTrans(*ctx->getTab());
470 while(i<=loops && result != NDBT_FAILED){
471
472 ndbout << "Loop " << i << "/"<< loops <<" started" << endl;
473 /*
474 1. Load data until db reports it's full
475 2. Restart cluster and verify records
476 */
477 ndbout << "Filling up table..." << endl;
478 CHECK(hugoTrans.fillTable(pNdb) == 0);
479 CHECK(utilTrans.selectCount(pNdb, 64, &count1) == 0);
480 ndbout << "Db is full. Table has "<<count1 <<" records."<< endl;
481
482 ndbout << "Restarting cluster" << endl;
483 CHECK(restarter.restartAll() == 0);
484 CHECK(restarter.waitClusterStarted(timeout) == 0);
485 CHECK(pNdb->waitUntilReady(timeout) == 0);
486
487 ndbout << "Verifying records..." << endl;
488 CHECK(hugoTrans.scanReadRecords(pNdb, count1) == 0);
489 CHECK(utilTrans.selectCount(pNdb, 64, &count2) == 0);
490 CHECK(count1 == count2);
491
492 ndbout << "Deleting all records..." << endl;
493 CHECK(utilTrans.clearTable2(pNdb, count1) == 0);
494
495 ndbout << "Restarting cluster..." << endl;
496 CHECK(restarter.restartAll() == 0);
497 CHECK(restarter.waitClusterStarted(timeout) == 0);
498 CHECK(pNdb->waitUntilReady(timeout) == 0);
499
500 ndbout << "Verifying records..." << endl;
501 CHECK(utilTrans.selectCount(pNdb, 64, &count1) == 0);
502 CHECK(count1 == 0);
503
504 i++;
505 }
506
507 ndbout << "runSystemRestartTestFullDb finished" << endl;
508
509 return result;
510 }
511
runSystemRestart3(NDBT_Context * ctx,NDBT_Step * step)512 int runSystemRestart3(NDBT_Context* ctx, NDBT_Step* step){
513 Ndb* pNdb = GETNDB(step);
514 int result = NDBT_OK;
515 int timeout = 300;
516 Uint32 loops = ctx->getNumLoops();
517 int records = ctx->getNumRecords();
518 int count;
519 NdbRestarter restarter;
520 Uint32 i = 1;
521
522 const Uint32 nodeCount = restarter.getNumDbNodes();
523 if(nodeCount < 2){
524 g_info << "SR3 - Needs atleast 2 nodes to test" << endl;
525 return NDBT_OK;
526 }
527
528 Vector<int> nodeIds;
529 for(i = 0; i<nodeCount; i++)
530 nodeIds.push_back(restarter.getDbNodeId(i));
531
532 Uint32 currentRestartNodeIndex = 0;
533 UtilTransactions utilTrans(*ctx->getTab());
534 HugoTransactions hugoTrans(*ctx->getTab());
535
536 while(i<=loops && result != NDBT_FAILED){
537
538 g_info << "Loop " << i << "/"<< loops <<" started" << endl;
539 /**
540 * 1. Load data
541 * 2. Restart 1 node -nostart
542 * 3. Update records
543 * 4. Restart cluster and verify records
544 * 5. Restart 1 node -nostart
545 * 6. Delete half of the records
546 * 7. Restart cluster and verify records
547 * 8. Restart 1 node -nostart
548 * 9. Delete all records
549 * 10. Restart cluster and verify records
550 */
551 g_info << "Loading records..." << endl;
552 CHECK(hugoTrans.loadTable(pNdb, records) == 0);
553
554 /*** 1 ***/
555 g_info << "1 - Stopping one node" << endl;
556 CHECK(restarter.restartOneDbNode(nodeIds[currentRestartNodeIndex],
557 false,
558 true,
559 false) == 0);
560 currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
561
562 g_info << "Updating records..." << endl;
563 CHECK(hugoTrans.pkUpdateRecords(pNdb, records) == 0);
564
565 g_info << "Restarting cluster..." << endl;
566 CHECK(restarter.restartAll() == 0);
567 CHECK(restarter.waitClusterStarted(timeout) == 0);
568 CHECK(pNdb->waitUntilReady(timeout) == 0);
569
570 g_info << "Verifying records..." << endl;
571 CHECK(hugoTrans.pkReadRecords(pNdb, records) == 0);
572 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
573 CHECK(count == records);
574
575 g_info << "2 - Stopping one node" << endl;
576 CHECK(restarter.restartOneDbNode(nodeIds[currentRestartNodeIndex],
577 false,
578 true,
579 false) == 0);
580 currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
581
582 g_info << "Deleting 50% of records..." << endl;
583 CHECK(hugoTrans.pkDelRecords(pNdb, records/2) == 0);
584
585 g_info << "Restarting cluster..." << endl;
586 CHECK(restarter.restartAll() == 0);
587 CHECK(restarter.waitClusterStarted(timeout) == 0);
588 CHECK(pNdb->waitUntilReady(timeout) == 0);
589
590 g_info << "Verifying records..." << endl;
591 CHECK(hugoTrans.scanReadRecords(pNdb, records/2, 0, 64) == 0);
592 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
593 CHECK(count == (records/2));
594
595 g_info << "3 - Stopping one node" << endl;
596 CHECK(restarter.restartOneDbNode(nodeIds[currentRestartNodeIndex],
597 false,
598 true,
599 false) == 0);
600 currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
601 g_info << "Deleting all records..." << endl;
602 CHECK(utilTrans.clearTable(pNdb, records/2) == 0);
603
604 g_info << "Restarting cluster..." << endl;
605 CHECK(restarter.restartAll() == 0);
606 CHECK(restarter.waitClusterStarted(timeout) == 0);
607 CHECK(pNdb->waitUntilReady(timeout) == 0);
608
609 ndbout << "Verifying records..." << endl;
610 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
611 CHECK(count == 0);
612
613 i++;
614 }
615
616 g_info << "runSystemRestart3 finished" << endl;
617
618 return result;
619 }
620
runSystemRestart4(NDBT_Context * ctx,NDBT_Step * step)621 int runSystemRestart4(NDBT_Context* ctx, NDBT_Step* step){
622 Ndb* pNdb = GETNDB(step);
623 int result = NDBT_OK;
624 int timeout = 300;
625 Uint32 loops = ctx->getNumLoops();
626 int records = ctx->getNumRecords();
627 int count;
628 NdbRestarter restarter;
629 Uint32 i = 1;
630
631 const Uint32 nodeCount = restarter.getNumDbNodes();
632 if(nodeCount < 2){
633 g_info << "SR4 - Needs atleast 2 nodes to test" << endl;
634 return NDBT_OK;
635 }
636
637 Vector<int> nodeIds;
638 for(i = 0; i<nodeCount; i++)
639 nodeIds.push_back(restarter.getDbNodeId(i));
640
641 Uint32 currentRestartNodeIndex = 0;
642 UtilTransactions utilTrans(*ctx->getTab());
643 HugoTransactions hugoTrans(*ctx->getTab());
644
645 {
646 int val = DumpStateOrd::DihMinTimeBetweenLCP;
647 if(restarter.dumpStateAllNodes(&val, 1) != 0){
648 g_err << "ERR: "<< step->getName()
649 << " failed on line " << __LINE__ << endl;
650 return NDBT_FAILED;
651 }
652 }
653
654 while(i<=loops && result != NDBT_FAILED){
655
656 g_info << "Loop " << i << "/"<< loops <<" started" << endl;
657 /**
658 * 1. Load data
659 * 2. Restart 1 node -nostart
660 * 3. Update records
661 * 4. Restart cluster and verify records
662 * 5. Restart 1 node -nostart
663 * 6. Delete half of the records
664 * 7. Restart cluster and verify records
665 * 8. Restart 1 node -nostart
666 * 9. Delete all records
667 * 10. Restart cluster and verify records
668 */
669 g_info << "Loading records..." << endl;
670 CHECK(hugoTrans.loadTable(pNdb, records) == 0);
671
672 /*** 1 ***/
673 g_info << "1 - Stopping one node" << endl;
674 CHECK(restarter.restartOneDbNode(nodeIds[currentRestartNodeIndex],
675 false,
676 true,
677 false) == 0);
678 currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
679
680 g_info << "Updating records..." << endl;
681 CHECK(hugoTrans.pkUpdateRecords(pNdb, records) == 0);
682
683 g_info << "Restarting cluster..." << endl;
684 CHECK(restarter.restartAll() == 0);
685 CHECK(restarter.waitClusterStarted(timeout) == 0);
686 {
687 int val = DumpStateOrd::DihMinTimeBetweenLCP;
688 CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
689 }
690 CHECK(pNdb->waitUntilReady(timeout) == 0);
691
692 g_info << "Verifying records..." << endl;
693 CHECK(hugoTrans.pkReadRecords(pNdb, records) == 0);
694 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
695 CHECK(count == records);
696
697 g_info << "2 - Stopping one node" << endl;
698 CHECK(restarter.restartOneDbNode(nodeIds[currentRestartNodeIndex],
699 false,
700 true,
701 false) == 0);
702 currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
703
704 g_info << "Deleting 50% of records..." << endl;
705 CHECK(hugoTrans.pkDelRecords(pNdb, records/2) == 0);
706
707 g_info << "Restarting cluster..." << endl;
708 CHECK(restarter.restartAll() == 0);
709 CHECK(restarter.waitClusterStarted(timeout) == 0);
710 {
711 int val = DumpStateOrd::DihMinTimeBetweenLCP;
712 CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
713 }
714 CHECK(pNdb->waitUntilReady(timeout) == 0);
715
716 g_info << "Verifying records..." << endl;
717 CHECK(hugoTrans.scanReadRecords(pNdb, records/2, 0, 64) == 0);
718 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
719 CHECK(count == (records/2));
720
721 g_info << "3 - Stopping one node" << endl;
722 CHECK(restarter.restartOneDbNode(nodeIds[currentRestartNodeIndex],
723 false,
724 true,
725 false) == 0);
726 currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
727 g_info << "Deleting all records..." << endl;
728 CHECK(utilTrans.clearTable(pNdb, records/2) == 0);
729
730 g_info << "Restarting cluster..." << endl;
731 CHECK(restarter.restartAll() == 0);
732 CHECK(restarter.waitClusterStarted(timeout) == 0);
733 {
734 int val = DumpStateOrd::DihMinTimeBetweenLCP;
735 CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
736 }
737 CHECK(pNdb->waitUntilReady(timeout) == 0);
738
739 ndbout << "Verifying records..." << endl;
740 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
741 CHECK(count == 0);
742
743 i++;
744 }
745
746 g_info << "runSystemRestart4 finished" << endl;
747
748 return result;
749 }
750
runSystemRestart5(NDBT_Context * ctx,NDBT_Step * step)751 int runSystemRestart5(NDBT_Context* ctx, NDBT_Step* step){
752 Ndb* pNdb = GETNDB(step);
753 int result = NDBT_OK;
754 int timeout = 300;
755 Uint32 loops = ctx->getNumLoops();
756 int records = ctx->getNumRecords();
757 int count;
758 NdbRestarter restarter;
759 Uint32 i = 1;
760
761 const Uint32 nodeCount = restarter.getNumDbNodes();
762 if(nodeCount < 2){
763 g_info << "SR5 - Needs atleast 2 nodes to test" << endl;
764 return NDBT_OK;
765 }
766
767 Vector<int> nodeIds;
768 for(i = 0; i<nodeCount; i++)
769 nodeIds.push_back(restarter.getDbNodeId(i));
770
771 Uint32 currentRestartNodeIndex = 0;
772 UtilTransactions utilTrans(*ctx->getTab());
773 HugoTransactions hugoTrans(*ctx->getTab());
774
775 {
776 int val = DumpStateOrd::DihMinTimeBetweenLCP;
777 if(restarter.dumpStateAllNodes(&val, 1) != 0){
778 g_err << "ERR: "<< step->getName()
779 << " failed on line " << __LINE__ << endl;
780 return NDBT_FAILED;
781 }
782 }
783
784 while(i<=loops && result != NDBT_FAILED){
785
786 g_info << "Loop " << i << "/"<< loops <<" started" << endl;
787 /**
788 * 1. Load data
789 * 2. Restart 1 node -nostart
790 * 3. Update records
791 * 4. Restart cluster and verify records
792 * 5. Restart 1 node -nostart
793 * 6. Delete half of the records
794 * 7. Restart cluster and verify records
795 * 8. Restart 1 node -nostart
796 * 9. Delete all records
797 * 10. Restart cluster and verify records
798 */
799 g_info << "Loading records..." << endl;
800 hugoTrans.loadTable(pNdb, records);
801
802 /*** 1 ***/
803 g_info << "1 - Stopping one node" << endl;
804 CHECK(restarter.restartOneDbNode(nodeIds[currentRestartNodeIndex],
805 false,
806 true,
807 false) == 0);
808 currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
809
810 g_info << "Updating records..." << endl;
811 hugoTrans.pkUpdateRecords(pNdb, records);
812
813 g_info << "Restarting cluster..." << endl;
814 CHECK(restarter.restartAll(false, false, true) == 0);
815 CHECK(restarter.waitClusterStarted(timeout) == 0);
816 {
817 int val = DumpStateOrd::DihMinTimeBetweenLCP;
818 CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
819 }
820 CHECK(pNdb->waitUntilReady(timeout) == 0);
821
822 g_info << "Verifying records..." << endl;
823 hugoTrans.pkReadRecords(pNdb, records);
824 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
825 //CHECK(count == records);
826
827 g_info << "2 - Stopping one node" << endl;
828 CHECK(restarter.restartOneDbNode(nodeIds[currentRestartNodeIndex],
829 false,
830 true,
831 false) == 0);
832 currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
833
834 g_info << "Deleting 50% of records..." << endl;
835 hugoTrans.pkDelRecords(pNdb, records/2);
836
837 g_info << "Restarting cluster..." << endl;
838 CHECK(restarter.restartAll(false, false, true) == 0);
839 CHECK(restarter.waitClusterStarted(timeout) == 0);
840 {
841 int val = DumpStateOrd::DihMinTimeBetweenLCP;
842 CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
843 }
844 CHECK(pNdb->waitUntilReady(timeout) == 0);
845
846 g_info << "Verifying records..." << endl;
847 hugoTrans.scanReadRecords(pNdb, records/2, 0, 64);
848 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
849 //CHECK(count == (records/2));
850
851 g_info << "3 - Stopping one node" << endl;
852 CHECK(restarter.restartOneDbNode(nodeIds[currentRestartNodeIndex],
853 false,
854 true,
855 false) == 0);
856 currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
857 g_info << "Deleting all records..." << endl;
858 utilTrans.clearTable(pNdb, records/2);
859
860 g_info << "Restarting cluster..." << endl;
861 CHECK(restarter.restartAll(false, false, true) == 0);
862 CHECK(restarter.waitClusterStarted(timeout) == 0);
863 {
864 int val = DumpStateOrd::DihMinTimeBetweenLCP;
865 CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
866 }
867 CHECK(pNdb->waitUntilReady(timeout) == 0);
868
869 ndbout << "Verifying records..." << endl;
870 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
871 //CHECK(count == 0);
872
873 CHECK(utilTrans.clearTable(pNdb) == 0);
874 i++;
875 }
876
877 g_info << "runSystemRestart5 finished" << endl;
878
879 return result;
880 }
881
runSystemRestart6(NDBT_Context * ctx,NDBT_Step * step)882 int runSystemRestart6(NDBT_Context* ctx, NDBT_Step* step){
883 Ndb* pNdb = GETNDB(step);
884 int result = NDBT_OK;
885 int timeout = 300;
886 Uint32 loops = ctx->getNumLoops();
887 int records = ctx->getNumRecords();
888 NdbRestarter restarter;
889 Uint32 i = 1;
890
891 const Uint32 nodeCount = restarter.getNumDbNodes();
892 if(nodeCount < 2){
893 g_info << "SR6 - Needs atleast 2 nodes to test" << endl;
894 return NDBT_OK;
895 }
896
897 Vector<int> nodeIds;
898 for(i = 0; i<nodeCount; i++)
899 nodeIds.push_back(restarter.getDbNodeId(i));
900
901 Uint32 currentRestartNodeIndex = 0;
902 UtilTransactions utilTrans(*ctx->getTab());
903 HugoTransactions hugoTrans(*ctx->getTab());
904
905 while(i<=loops && result != NDBT_FAILED){
906
907 g_info << "Loop " << i << "/"<< loops <<" started" << endl;
908 /**
909 * 1. Load data
910 * 2. Restart all node -nostart
911 * 3. Restart some nodes -i -nostart
912 * 4. Start all nodes verify records
913 */
914 g_info << "Loading records..." << endl;
915 hugoTrans.loadTable(pNdb, records);
916
917 CHECK(restarter.restartAll(false, true, false) == 0);
918
919 Uint32 nodeId = nodeIds[currentRestartNodeIndex];
920 currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
921
922 CHECK(restarter.restartOneDbNode(nodeId, true, true,false) == 0);
923 CHECK(restarter.waitClusterNoStart(timeout) == 0);
924 CHECK(restarter.startAll() == 0);
925 CHECK(restarter.waitClusterStarted(timeout) == 0);
926 CHECK(pNdb->waitUntilReady(timeout) == 0);
927 int count = records - 1;
928 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
929 CHECK(count == records);
930 CHECK(utilTrans.clearTable(pNdb) == 0);
931 i++;
932 }
933
934 g_info << "runSystemRestart6 finished" << endl;
935
936 return result;
937 }
938
runSystemRestart7(NDBT_Context * ctx,NDBT_Step * step)939 int runSystemRestart7(NDBT_Context* ctx, NDBT_Step* step){
940 Ndb* pNdb = GETNDB(step);
941 int result = NDBT_OK;
942 Uint32 loops = ctx->getNumLoops();
943 int records = ctx->getNumRecords();
944 NdbRestarter restarter;
945 Uint32 i = 1;
946
947 const Uint32 nodeCount = restarter.getNumDbNodes();
948 if(nodeCount < 2){
949 g_info << "SR7 - Needs atleast 2 nodes to test" << endl;
950 return NDBT_OK;
951 }
952
953 Vector<int> nodeIds;
954 for(i = 0; i<nodeCount; i++)
955 nodeIds.push_back(restarter.getDbNodeId(i));
956
957 int a_nodeIds[64];
958 if(nodeCount > 64)
959 abort();
960
961 Uint32 currentRestartNodeIndex = 1;
962 UtilTransactions utilTrans(*ctx->getTab());
963 HugoTransactions hugoTrans(*ctx->getTab());
964
965 while(i<=loops && result != NDBT_FAILED){
966
967 g_info << "Loop " << i << "/"<< loops <<" started" << endl;
968 /**
969 * 1. Load data
970 * 2. Restart all node -nostart
971 * 3. Start all but one node
972 * 4. Wait for startphase >= 2
973 * 5. Start last node
974 * 6. Verify records
975 */
976 g_info << "Loading records..." << endl;
977 hugoTrans.loadTable(pNdb, records);
978
979 CHECK(restarter.restartAll(false, true, false) == 0);
980
981 int nodeId = nodeIds[currentRestartNodeIndex];
982 currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
983
984 Uint32 j = 0;
985 for(Uint32 k = 0; k<nodeCount; k++){
986 if(nodeIds[k] != nodeId){
987 a_nodeIds[j++] = nodeIds[k];
988 }
989 }
990
991 CHECK(restarter.startNodes(a_nodeIds, nodeCount - 1) == 0);
992 CHECK(restarter.waitNodesStarted(a_nodeIds, nodeCount - 1, 120) == 0);
993 CHECK(pNdb->waitUntilReady(5) == 0);
994 int count = records - 1;
995 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
996 CHECK(count == records);
997
998 CHECK(restarter.startNodes(&nodeId, 1) == 0);
999 CHECK(restarter.waitNodesStarted(&nodeId, 1, 120) == 0);
1000
1001 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
1002 CHECK(count == records);
1003 CHECK(utilTrans.clearTable(pNdb) == 0);
1004
1005 i++;
1006 }
1007
1008 g_info << "runSystemRestart7 finished" << endl;
1009
1010 return result;
1011 }
1012
runSystemRestart8(NDBT_Context * ctx,NDBT_Step * step)1013 int runSystemRestart8(NDBT_Context* ctx, NDBT_Step* step){
1014 Ndb* pNdb = GETNDB(step);
1015 int result = NDBT_OK;
1016 int timeout = 300;
1017 Uint32 loops = ctx->getNumLoops();
1018 int records = ctx->getNumRecords();
1019 NdbRestarter restarter;
1020 Uint32 i = 1;
1021
1022 const Uint32 nodeCount = restarter.getNumDbNodes();
1023 if(nodeCount < 2){
1024 g_info << "SR8 - Needs atleast 2 nodes to test" << endl;
1025 return NDBT_OK;
1026 }
1027
1028 Vector<int> nodeIds;
1029 for(i = 0; i<nodeCount; i++)
1030 nodeIds.push_back(restarter.getDbNodeId(i));
1031
1032 int a_nodeIds[64];
1033 if(nodeCount > 64)
1034 abort();
1035
1036 Uint32 currentRestartNodeIndex = 1;
1037 UtilTransactions utilTrans(*ctx->getTab());
1038 HugoTransactions hugoTrans(*ctx->getTab());
1039
1040 while(i<=loops && result != NDBT_FAILED){
1041
1042 g_info << "Loop " << i << "/"<< loops <<" started" << endl;
1043 /**
1044 * 1. Load data
1045 * 2. Restart all node -nostart
1046 * 3. Start all but one node
1047 * 4. Verify records
1048 * 5. Start last node
1049 * 6. Verify records
1050 */
1051 g_info << "Loading records..." << endl;
1052 hugoTrans.loadTable(pNdb, records);
1053
1054 CHECK(restarter.restartAll(false, true, false) == 0);
1055
1056 int nodeId = nodeIds[currentRestartNodeIndex];
1057 currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
1058
1059 Uint32 j = 0;
1060 for(Uint32 k = 0; k<nodeCount; k++){
1061 if(nodeIds[k] != nodeId){
1062 a_nodeIds[j++] = nodeIds[k];
1063 }
1064 }
1065
1066 CHECK(restarter.startNodes(a_nodeIds, nodeCount-1) == 0);
1067 CHECK(restarter.waitNodesStartPhase(a_nodeIds, nodeCount-1, 3, 120) == 0);
1068 CHECK(restarter.startNodes(&nodeId, 1) == 0);
1069 CHECK(restarter.waitClusterStarted(timeout) == 0);
1070 CHECK(pNdb->waitUntilReady() == 0);
1071
1072 int count = records - 1;
1073 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
1074 CHECK(count == records);
1075 CHECK(utilTrans.clearTable(pNdb) == 0);
1076 i++;
1077 }
1078
1079 g_info << "runSystemRestart8 finished" << endl;
1080
1081 return result;
1082 }
1083
runSystemRestart9(NDBT_Context * ctx,NDBT_Step * step)1084 int runSystemRestart9(NDBT_Context* ctx, NDBT_Step* step){
1085 Ndb* pNdb = GETNDB(step);
1086 int result = NDBT_OK;
1087 int timeout = 300;
1088 NdbRestarter restarter;
1089 Uint32 i = 1;
1090
1091 UtilTransactions utilTrans(*ctx->getTab());
1092 HugoTransactions hugoTrans(*ctx->getTab());
1093
1094 int args[] = { DumpStateOrd::DihMaxTimeBetweenLCP };
1095 int dump[] = { DumpStateOrd::DihStartLcpImmediately };
1096
1097 do {
1098 CHECK(restarter.dumpStateAllNodes(args, 1) == 0);
1099
1100 HugoOperations ops(* ctx->getTab());
1101 CHECK(ops.startTransaction(pNdb) == 0);
1102 for(i = 0; i<10; i++){
1103 CHECK(ops.pkInsertRecord(pNdb, i, 1, 1) == 0);
1104 CHECK(ops.execute_NoCommit(pNdb) == 0);
1105 }
1106 for(i = 0; i<10; i++){
1107 CHECK(ops.pkUpdateRecord(pNdb, i, 1) == 0);
1108 CHECK(ops.execute_NoCommit(pNdb) == 0);
1109 }
1110 NdbSleep_SecSleep(10);
1111 CHECK(restarter.dumpStateAllNodes(dump, 1) == 0);
1112 NdbSleep_SecSleep(10);
1113 CHECK(ops.execute_Commit(pNdb) == 0);
1114
1115 CHECK(restarter.restartAll() == 0);
1116 CHECK(restarter.waitClusterStarted(timeout) == 0);
1117 CHECK(pNdb->waitUntilReady(timeout) == 0);
1118 ops.closeTransaction(pNdb);
1119 } while(0);
1120
1121 g_info << "runSystemRestart9 finished" << endl;
1122
1123 return result;
1124 }
1125
runSystemRestart10(NDBT_Context * ctx,NDBT_Step * step)1126 int runSystemRestart10(NDBT_Context* ctx, NDBT_Step* step)
1127 {
1128 Ndb* pNdb = GETNDB(step);
1129 int result = NDBT_OK;
1130 //Uint32 loops = ctx->getNumLoops();
1131 Uint32 loops = 3;
1132 int records = ctx->getNumRecords();
1133 NdbRestarter restarter;
1134 Uint32 i = 1;
1135
1136 const Uint32 nodeCount = restarter.getNumDbNodes();
1137 if(nodeCount < 4){
1138 g_info << "SR10 - Needs atleast 4 nodes to test" << endl;
1139 return NDBT_OK;
1140 }
1141
1142 Vector<int> nodeIds;
1143 for(i = 0; i<nodeCount; i++)
1144 nodeIds.push_back(restarter.getDbNodeId(i));
1145
1146 int a_nodeIds[64];
1147 if(nodeCount > 64)
1148 abort();
1149
1150 UtilTransactions utilTrans(*ctx->getTab());
1151 HugoTransactions hugoTrans(*ctx->getTab());
1152
1153 i = 1;
1154 while(i < loops && result != NDBT_FAILED){
1155
1156 g_info << "Loop " << i << "/"<< loops <<" started" << endl;
1157 /**
1158 * 1. Load data
1159 * 2. Stop one node X (restart -nostart)
1160 * 3. Wait 10 seconds to ensure some GCPs are executed.
1161 * 4. Stop the rest of the nodes
1162 * 5. Start all nodes, but insert an error into the 2nd
1163 * node to prevent it from passing phase 3 for 10
1164 * seconds. The cluster should wait for these 10
1165 * seconds, it cannot proceed at this point without
1166 * it. If it tries to start without it, there will
1167 * be a crash of the system restart.
1168 * 6. Verify records
1169 */
1170
1171 g_info << "Loading records..." << endl;
1172 hugoTrans.loadTable(pNdb, records);
1173
1174 Uint32 j = 0;
1175 for(Uint32 k = 0; k<nodeCount; k++)
1176 {
1177 a_nodeIds[j++] = nodeIds[k];
1178 }
1179
1180 g_info << "Stop 2nd last node" << endl;
1181 CHECK(restarter.restartOneDbNode(a_nodeIds[nodeCount - 2],
1182 false,
1183 true,
1184 false) == 0);
1185
1186 NdbSleep_SecSleep(10);
1187 g_info << "Stop rest of the nodes" << endl;
1188 CHECK(restarter.restartAll(false, true, false) == 0);
1189
1190 int nodeId = a_nodeIds[nodeCount - 1];
1191
1192 if (i == 0)
1193 {
1194 g_info << "Inject Error 1021 into last node to stop it in phase 1" << endl;
1195 CHECK(restarter.insertErrorInNode(nodeId, 1021) == 0);
1196 }
1197 else if (i == 1)
1198 {
1199 g_info << "Inject Error 1010 into last node to stop it in phase 4" << endl;
1200 CHECK(restarter.insertErrorInNode(nodeId, 1010) == 0);
1201 }
1202 if (i == 2)
1203 {
1204 g_info << "Start all nodes except the last node" << endl;
1205 CHECK(restarter.startNodes(a_nodeIds, nodeCount - 1) == 0);
1206 g_info << "Wait for those nodes to start, expect failure" << endl;
1207 CHECK(restarter.waitNodesStarted(a_nodeIds, nodeCount - 1, 30) != 0);
1208 g_info << "Start the last node" << endl;
1209 CHECK(restarter.startNodes(&nodeId, 1) == 0);
1210 g_info << "Wait for cluster to be started" << endl;
1211 CHECK(restarter.waitNodesStarted(a_nodeIds, nodeCount, 120) == 0);
1212 }
1213 else
1214 {
1215 CHECK(restarter.startNodes(a_nodeIds, nodeCount) == 0);
1216 g_info << "Wait for cluster to be started" << endl;
1217 CHECK(restarter.waitNodesStarted(a_nodeIds, nodeCount, 120) == 0);
1218 }
1219 g_info << "Perform consistency checks" << endl;
1220 CHECK(pNdb->waitUntilReady(5) == 0);
1221 int count = records - 1;
1222 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
1223 CHECK(count == records);
1224
1225 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
1226 CHECK(count == records);
1227 CHECK(utilTrans.clearTable(pNdb) == 0);
1228
1229 i++;
1230 }
1231
1232 g_info << "runSystemRestart10 finished" << endl;
1233
1234 return result;
1235 }
1236
runBug18385(NDBT_Context * ctx,NDBT_Step * step)1237 int runBug18385(NDBT_Context* ctx, NDBT_Step* step){
1238 NdbRestarter restarter;
1239 const Uint32 nodeCount = restarter.getNumDbNodes();
1240 if(nodeCount < 2){
1241 g_info << "Bug18385 - Needs atleast 2 nodes to test" << endl;
1242 return NDBT_OK;
1243 }
1244
1245 int node1 = restarter.getDbNodeId(rand() % nodeCount);
1246 int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand());
1247
1248 if (node1 == -1 || node2 == -1)
1249 return NDBT_OK;
1250
1251 int dump[] = { DumpStateOrd::DihSetTimeBetweenGcp, 300 };
1252
1253 int result = NDBT_OK;
1254 do {
1255 CHECK(restarter.dumpStateAllNodes(dump, 2) == 0);
1256 CHECK(restarter.restartOneDbNode(node1, false, true, false) == 0);
1257 NdbSleep_SecSleep(3);
1258 CHECK(restarter.restartAll(false, true, false) == 0);
1259
1260 Uint32 cnt = 0;
1261 int nodes[128];
1262 for(Uint32 i = 0; i<nodeCount; i++)
1263 if ((nodes[cnt] = restarter.getDbNodeId(i)) != node2)
1264 cnt++;
1265
1266 require(cnt == nodeCount - 1);
1267
1268 CHECK(restarter.startNodes(nodes, cnt) == 0);
1269 CHECK(restarter.waitNodesStarted(nodes, cnt, 300) == 0);
1270
1271 CHECK(restarter.insertErrorInNode(node2, 7170) == 0);
1272 CHECK(restarter.waitNodesNoStart(&node2, 1) == 0);
1273 CHECK(restarter.restartOneDbNode(node2, true, false, true) == 0);
1274 CHECK(restarter.waitNodesStarted(&node2, 1) == 0);
1275
1276 } while(0);
1277
1278 g_info << "Bug18385 finished" << endl;
1279
1280 return result;
1281 }
1282
runWaitStarted(NDBT_Context * ctx,NDBT_Step * step)1283 int runWaitStarted(NDBT_Context* ctx, NDBT_Step* step){
1284
1285 NdbRestarter restarter;
1286 restarter.waitClusterStarted(300);
1287
1288 NdbSleep_SecSleep(3);
1289 return NDBT_OK;
1290 }
1291
runClearTable(NDBT_Context * ctx,NDBT_Step * step)1292 int runClearTable(NDBT_Context* ctx, NDBT_Step* step){
1293 int records = ctx->getNumRecords();
1294
1295 Ndb* pNdb = GETNDB(step);
1296 if(pNdb->waitUntilReady(5) != 0){
1297 return NDBT_FAILED;
1298 }
1299
1300 UtilTransactions utilTrans(*ctx->getTab());
1301 if (utilTrans.clearTable2(pNdb, records) != 0){
1302 return NDBT_FAILED;
1303 }
1304 return NDBT_OK;
1305 }
1306
1307 int
runBug21536(NDBT_Context * ctx,NDBT_Step * step)1308 runBug21536(NDBT_Context* ctx, NDBT_Step* step)
1309 {
1310 NdbRestarter restarter;
1311 const Uint32 nodeCount = restarter.getNumDbNodes();
1312 if(nodeCount != 2){
1313 g_info << "Bug21536 - 2 nodes to test" << endl;
1314 return NDBT_OK;
1315 }
1316
1317 int node1 = restarter.getDbNodeId(rand() % nodeCount);
1318 int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand());
1319
1320 if (node1 == -1 || node2 == -1)
1321 return NDBT_OK;
1322
1323 int result = NDBT_OK;
1324 do {
1325 CHECK(restarter.restartOneDbNode(node1, false, true, true) == 0);
1326 CHECK(restarter.waitNodesNoStart(&node1, 1) == 0);
1327 CHECK(restarter.insertErrorInNode(node1, 1000) == 0);
1328 int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
1329 CHECK(restarter.dumpStateOneNode(node1, val2, 2) == 0);
1330 CHECK(restarter.startNodes(&node1, 1) == 0);
1331 restarter.waitNodesStartPhase(&node1, 1, 3, 120);
1332 CHECK(restarter.waitNodesNoStart(&node1, 1) == 0);
1333
1334 CHECK(restarter.restartOneDbNode(node2, true, true, true) == 0);
1335 CHECK(restarter.waitNodesNoStart(&node2, 1) == 0);
1336 CHECK(restarter.startNodes(&node1, 1) == 0);
1337 CHECK(restarter.waitNodesStarted(&node1, 1) == 0);
1338 CHECK(restarter.startNodes(&node2, 1) == 0);
1339 CHECK(restarter.waitClusterStarted() == 0);
1340
1341 } while(0);
1342
1343 g_info << "Bug21536 finished" << endl;
1344
1345 return result;
1346 }
1347
1348 int
runBug24664(NDBT_Context * ctx,NDBT_Step * step)1349 runBug24664(NDBT_Context* ctx, NDBT_Step* step)
1350 {
1351 int result = NDBT_OK;
1352 NdbRestarter restarter;
1353 Ndb* pNdb = GETNDB(step);
1354
1355 int records = ctx->getNumRecords();
1356 UtilTransactions utilTrans(*ctx->getTab());
1357 HugoTransactions hugoTrans(*ctx->getTab());
1358
1359 int args[] = { DumpStateOrd::DihMaxTimeBetweenLCP };
1360 int dump[] = { DumpStateOrd::DihStartLcpImmediately };
1361
1362 restarter.getNumDbNodes();
1363 int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 };
1364 NdbLogEventHandle handle =
1365 ndb_mgm_create_logevent_handle(restarter.handle, filter);
1366
1367 struct ndb_logevent event;
1368
1369 do {
1370 CHECK(restarter.dumpStateAllNodes(args, 1) == 0);
1371 CHECK(restarter.dumpStateAllNodes(dump, 1) == 0);
1372 while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
1373 event.type != NDB_LE_LocalCheckpointStarted);
1374 while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
1375 event.type != NDB_LE_LocalCheckpointCompleted);
1376
1377 if (hugoTrans.loadTable(GETNDB(step), records) != 0){
1378 return NDBT_FAILED;
1379 }
1380
1381 restarter.insertErrorInAllNodes(10039); // Hang LCP
1382 CHECK(restarter.dumpStateAllNodes(dump, 1) == 0);
1383 while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
1384 event.type != NDB_LE_LocalCheckpointStarted);
1385 NdbSleep_SecSleep(3);
1386 CHECK(utilTrans.clearTable(pNdb, records) == 0);
1387 if (hugoTrans.loadTable(GETNDB(step), records) != 0){
1388 return NDBT_FAILED;
1389 }
1390
1391 restarter.insertErrorInAllNodes(10040); // Resume LCP
1392 while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
1393 event.type != NDB_LE_LocalCheckpointCompleted);
1394
1395 while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
1396 event.type != NDB_LE_GlobalCheckpointCompleted);
1397 while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
1398 event.type != NDB_LE_GlobalCheckpointCompleted);
1399 restarter.restartAll(false, false, true);
1400 CHECK(restarter.waitClusterStarted() == 0);
1401 } while(false);
1402
1403 return result;
1404 }
1405
1406 int
runBug27434(NDBT_Context * ctx,NDBT_Step * step)1407 runBug27434(NDBT_Context* ctx, NDBT_Step* step)
1408 {
1409 int result = NDBT_OK;
1410 NdbRestarter restarter;
1411 const Uint32 nodeCount = restarter.getNumDbNodes();
1412
1413 if (nodeCount < 2)
1414 return NDBT_OK;
1415
1416 int args[] = { DumpStateOrd::DihMaxTimeBetweenLCP };
1417 int dump[] = { DumpStateOrd::DihStartLcpImmediately };
1418
1419 int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 };
1420 NdbLogEventHandle handle =
1421 ndb_mgm_create_logevent_handle(restarter.handle, filter);
1422
1423 struct ndb_logevent event;
1424
1425 do {
1426 int node1 = restarter.getDbNodeId(rand() % nodeCount);
1427 CHECK(restarter.restartOneDbNode(node1, false, true, true) == 0);
1428 NdbSleep_SecSleep(3);
1429 CHECK(restarter.waitNodesNoStart(&node1, 1) == 0);
1430
1431 CHECK(restarter.dumpStateAllNodes(args, 1) == 0);
1432
1433 for (Uint32 i = 0; i<3; i++)
1434 {
1435 CHECK(restarter.dumpStateAllNodes(dump, 1) == 0);
1436 while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
1437 event.type != NDB_LE_LocalCheckpointStarted);
1438 while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
1439 event.type != NDB_LE_LocalCheckpointCompleted);
1440 }
1441
1442 restarter.restartAll(false, true, true);
1443 NdbSleep_SecSleep(3);
1444 CHECK(restarter.waitClusterNoStart() == 0);
1445 restarter.insertErrorInNode(node1, 5046);
1446 restarter.startAll();
1447 CHECK(restarter.waitClusterStarted() == 0);
1448 } while(false);
1449
1450 return result;
1451 }
1452
1453 int
runBug29167(NDBT_Context * ctx,NDBT_Step * step)1454 runBug29167(NDBT_Context* ctx, NDBT_Step* step)
1455 {
1456 int result = NDBT_OK;
1457 NdbRestarter restarter;
1458 const Uint32 nodeCount = restarter.getNumDbNodes();
1459
1460 if (nodeCount < 4)
1461 return NDBT_OK;
1462
1463 struct ndb_logevent event;
1464 int master = restarter.getMasterNodeId();
1465 do {
1466 int node1 = restarter.getRandomNodeOtherNodeGroup(master, rand());
1467 int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand());
1468
1469 ndbout_c("node1: %u node2: %u", node1, node2);
1470
1471 int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
1472 restarter.dumpStateAllNodes(val2, 2);
1473 int dump[] = { DumpStateOrd::DihSetTimeBetweenGcp, 30000 };
1474 restarter.dumpStateAllNodes(dump, 2);
1475
1476 int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 };
1477 NdbLogEventHandle handle =
1478 ndb_mgm_create_logevent_handle(restarter.handle, filter);
1479
1480 while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
1481 event.type != NDB_LE_GlobalCheckpointCompleted);
1482
1483 ndb_mgm_destroy_logevent_handle(&handle);
1484
1485 CHECK(restarter.insertErrorInAllNodes(932) == 0);
1486
1487 CHECK(restarter.insertErrorInNode(node1, 7183) == 0);
1488 CHECK(restarter.insertErrorInNode(node2, 7183) == 0);
1489
1490 CHECK(restarter.waitClusterNoStart() == 0);
1491 restarter.startAll();
1492 CHECK(restarter.waitClusterStarted() == 0);
1493 } while(false);
1494
1495 return result;
1496 }
1497 int
runBug28770(NDBT_Context * ctx,NDBT_Step * step)1498 runBug28770(NDBT_Context* ctx, NDBT_Step* step) {
1499 Ndb* pNdb = GETNDB(step);
1500 NdbRestarter restarter;
1501 int result = NDBT_OK;
1502 int count = 0;
1503 Uint32 i = 0;
1504 Uint32 loops = ctx->getNumLoops();
1505 int records = ctx->getNumRecords();
1506 UtilTransactions utilTrans(*ctx->getTab());
1507 HugoTransactions hugoTrans(*ctx->getTab());
1508
1509 g_info << "Loading records..." << endl; hugoTrans.loadTable(pNdb,
1510 records);
1511
1512
1513 while(i<=loops && result != NDBT_FAILED)
1514 {
1515 g_info << "Loop " << i << "/"<< loops <<" started" << endl;
1516 if (i == 0)
1517 {
1518 CHECK(restarter.restartAll(false, true, false) == 0); // graceful
1519 }
1520 else
1521 {
1522 CHECK(restarter.restartAll(false, true, true) == 0); // abort
1523 }
1524 CHECK(restarter.waitClusterNoStart() == 0);
1525 restarter.insertErrorInAllNodes(6024);
1526 CHECK(restarter.startAll()== 0);
1527 CHECK(restarter.waitClusterStarted() == 0);
1528 CHECK(pNdb->waitUntilReady() == 0);
1529 CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
1530 CHECK(count == records);
1531 i++;
1532 }
1533 ndbout << " runBug28770 finished" << endl;
1534 return result;
1535 }
1536
1537 int
runStopper(NDBT_Context * ctx,NDBT_Step * step)1538 runStopper(NDBT_Context* ctx, NDBT_Step* step)
1539 {
1540 NdbRestarter restarter;
1541 Uint32 stop = 0;
1542 loop:
1543 while (!ctx->isTestStopped() &&
1544 ((stop = ctx->getProperty("StopAbort", Uint32(0))) == 0))
1545 {
1546 NdbSleep_MilliSleep(30);
1547 }
1548
1549 if (ctx->isTestStopped())
1550 {
1551 return NDBT_OK;
1552 }
1553
1554 ctx->setProperty("StopAbort", Uint32(0));
1555
1556 ndbout << "Killing in " << stop << "ms..." << flush;
1557 NdbSleep_MilliSleep(stop);
1558 restarter.restartAll(false, true, true);
1559 goto loop;
1560 }
1561
runSR_DD_1(NDBT_Context * ctx,NDBT_Step * step)1562 int runSR_DD_1(NDBT_Context* ctx, NDBT_Step* step)
1563 {
1564 Ndb* pNdb = GETNDB(step);
1565 int result = NDBT_OK;
1566 Uint32 loops = ctx->getNumLoops();
1567 NdbRestarter restarter;
1568 NdbBackup backup;
1569 bool lcploop = ctx->getProperty("LCP", (unsigned)0);
1570 bool all = ctx->getProperty("ALL", (unsigned)0);
1571
1572 Uint32 i = 1;
1573
1574 int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
1575 int lcp = DumpStateOrd::DihMinTimeBetweenLCP;
1576
1577 int startFrom = 0;
1578
1579 HugoTransactions hugoTrans(*ctx->getTab());
1580 while(i<=loops && result != NDBT_FAILED)
1581 {
1582 if (i > 0 && ctx->closeToTimeout(30))
1583 break;
1584
1585 if (lcploop)
1586 {
1587 CHECK(restarter.dumpStateAllNodes(&lcp, 1) == 0);
1588 }
1589
1590 int nodeId = restarter.getDbNodeId(rand() % restarter.getNumDbNodes());
1591 //CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
1592
1593 ndbout << "Loop " << i << "/"<< loops <<" started" << endl;
1594 ndbout << "Loading records..." << startFrom << endl;
1595 CHECK(hugoTrans.loadTable(pNdb, startFrom) == 0);
1596
1597 if (!all)
1598 {
1599 ndbout << "Making " << nodeId << " crash" << endl;
1600 int kill[] = { 9999, 1000, 3000 };
1601 CHECK(restarter.dumpStateOneNode(nodeId, val, 2) == 0);
1602 CHECK(restarter.dumpStateOneNode(nodeId, kill, 3) == 0);
1603 }
1604 else
1605 {
1606 ndbout << "Crashing cluster" << endl;
1607 ctx->setProperty("StopAbort", 1000 + rand() % (3000 - 1000));
1608 }
1609 Uint64 end = NdbTick_CurrentMillisecond() + 4000;
1610 Uint32 row = startFrom;
1611 do {
1612 ndbout << "Loading from " << row << " to " << row + 1000 << endl;
1613 if (hugoTrans.loadTableStartFrom(pNdb, row, 1000) != 0)
1614 break;
1615 row += 1000;
1616 } while (NdbTick_CurrentMillisecond() < end);
1617
1618 if (!all)
1619 {
1620 ndbout << "Waiting for " << nodeId << " to restart" << endl;
1621 CHECK(restarter.waitNodesNoStart(&nodeId, 1) == 0);
1622 ndbout << "Restarting cluster" << endl;
1623 CHECK(restarter.restartAll(false, true, true) == 0);
1624 }
1625 else
1626 {
1627 ndbout << "Waiting for cluster to restart" << endl;
1628 }
1629 CHECK(restarter.waitClusterNoStart() == 0);
1630 CHECK(restarter.startAll() == 0);
1631 CHECK(restarter.waitClusterStarted() == 0);
1632 CHECK(pNdb->waitUntilReady() == 0);
1633
1634 ndbout << "Starting backup..." << flush;
1635 CHECK(backup.start() == 0);
1636 ndbout << "done" << endl;
1637
1638 int cnt = 0;
1639 CHECK(hugoTrans.selectCount(pNdb, 0, &cnt) == 0);
1640 ndbout << "Found " << cnt << " records..." << endl;
1641 ndbout << "Updating..." << endl;
1642 CHECK(hugoTrans.scanUpdateRecords(pNdb,
1643 NdbScanOperation::SF_TupScan, cnt) == 0
1644 || hugoTrans.getRetryMaxReached());
1645 ndbout << "Clearing..." << endl;
1646 CHECK(hugoTrans.clearTable(pNdb,
1647 NdbScanOperation::SF_TupScan, cnt) == 0);
1648
1649 if (cnt > startFrom)
1650 {
1651 startFrom = cnt;
1652 }
1653 startFrom += 1000;
1654 i++;
1655 }
1656
1657 ndbout << "runSR_DD_1 finished" << endl;
1658 ctx->stopTest();
1659 return result;
1660 }
1661
runSR_DD_2(NDBT_Context * ctx,NDBT_Step * step)1662 int runSR_DD_2(NDBT_Context* ctx, NDBT_Step* step)
1663 {
1664 Ndb* pNdb = GETNDB(step);
1665 int result = NDBT_OK;
1666 Uint32 loops = ctx->getNumLoops();
1667 Uint32 rows = ctx->getNumRecords();
1668 NdbRestarter restarter;
1669 NdbBackup backup;
1670 bool lcploop = ctx->getProperty("LCP", (unsigned)0);
1671 bool all = ctx->getProperty("ALL", (unsigned)0);
1672 int error = (int)ctx->getProperty("ERROR", (unsigned)0);
1673 rows = ctx->getProperty("ROWS", rows);
1674
1675 Uint32 i = 1;
1676
1677 int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
1678 int lcp = DumpStateOrd::DihMinTimeBetweenLCP;
1679
1680 if (error)
1681 {
1682 restarter.insertErrorInAllNodes(error);
1683 }
1684
1685 HugoTransactions hugoTrans(*ctx->getTab());
1686 while(i<=loops && result != NDBT_FAILED)
1687 {
1688 if (i > 0 && ctx->closeToTimeout(30))
1689 break;
1690
1691 if (lcploop)
1692 {
1693 CHECK(restarter.dumpStateAllNodes(&lcp, 1) == 0);
1694 }
1695
1696 int nodeId = restarter.getDbNodeId(rand() % restarter.getNumDbNodes());
1697
1698 if (!all)
1699 {
1700 ndbout << "Making " << nodeId << " crash" << endl;
1701 int kill[] = { 9999, 3000, 10000 };
1702 CHECK(restarter.dumpStateOneNode(nodeId, val, 2) == 0);
1703 CHECK(restarter.dumpStateOneNode(nodeId, kill, 3) == 0);
1704 }
1705 else
1706 {
1707 ndbout << "Crashing cluster" << endl;
1708 ctx->setProperty("StopAbort", 3000 + rand() % (10000 - 3000));
1709 }
1710
1711 Uint64 end = NdbTick_CurrentMillisecond() + 11000;
1712 do {
1713 if (hugoTrans.loadTable(pNdb, rows) != 0)
1714 break;
1715
1716 if (hugoTrans.clearTable(pNdb, NdbScanOperation::SF_TupScan, rows) != 0)
1717 break;
1718 } while (NdbTick_CurrentMillisecond() < end);
1719
1720 if (!all)
1721 {
1722 ndbout << "Waiting for " << nodeId << " to restart" << endl;
1723 CHECK(restarter.waitNodesNoStart(&nodeId, 1) == 0);
1724 ndbout << "Restarting cluster" << endl;
1725 CHECK(restarter.restartAll(false, true, true) == 0);
1726 }
1727 else
1728 {
1729 ndbout << "Waiting for cluster to restart" << endl;
1730 }
1731
1732 CHECK(restarter.waitClusterNoStart() == 0);
1733 CHECK(restarter.startAll() == 0);
1734 CHECK(restarter.waitClusterStarted() == 0);
1735 CHECK(pNdb->waitUntilReady() == 0);
1736
1737 if (error)
1738 {
1739 restarter.insertErrorInAllNodes(error);
1740 }
1741
1742 ndbout << "Starting backup..." << flush;
1743 CHECK(backup.start() == 0);
1744 ndbout << "done" << endl;
1745
1746 int cnt = 0;
1747 CHECK(hugoTrans.selectCount(pNdb, 0, &cnt) == 0);
1748 ndbout << "Found " << cnt << " records..." << endl;
1749 ndbout << "Updating..." << endl;
1750 CHECK(hugoTrans.scanUpdateRecords(pNdb,
1751 NdbScanOperation::SF_TupScan, cnt) == 0
1752 || hugoTrans.getRetryMaxReached());
1753 ndbout << "Clearing..." << endl;
1754 CHECK(hugoTrans.clearTable(pNdb,
1755 NdbScanOperation::SF_TupScan, cnt) == 0);
1756 i++;
1757 }
1758
1759 if (error)
1760 {
1761 restarter.insertErrorInAllNodes(0);
1762 }
1763
1764 ndbout << "runSR_DD_2 finished" << endl;
1765 ctx->stopTest();
1766 return result;
1767 }
1768
runSR_DD_3(NDBT_Context * ctx,NDBT_Step * step)1769 int runSR_DD_3(NDBT_Context* ctx, NDBT_Step* step)
1770 {
1771 Ndb* pNdb = GETNDB(step);
1772 int result = NDBT_OK;
1773 Uint32 loops = ctx->getNumLoops();
1774 Uint32 rows = ctx->getNumRecords();
1775 NdbRestarter restarter;
1776 NdbBackup backup;
1777 bool lcploop = ctx->getProperty("LCP", (unsigned)0);
1778 bool all = ctx->getProperty("ALL", (unsigned)0);
1779 int error = (int)ctx->getProperty("ERROR", (unsigned)0);
1780 rows = ctx->getProperty("ROWS", rows);
1781
1782 Uint32 i = 1;
1783
1784 int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
1785 int lcp = DumpStateOrd::DihMinTimeBetweenLCP;
1786
1787 if (error)
1788 {
1789 restarter.insertErrorInAllNodes(error);
1790 }
1791
1792 HugoTransactions hugoTrans(*ctx->getTab());
1793 while(i<=loops && result != NDBT_FAILED)
1794 {
1795 if (i > 0 && ctx->closeToTimeout(30))
1796 break;
1797
1798 if (lcploop)
1799 {
1800 CHECK(restarter.dumpStateAllNodes(&lcp, 1) == 0);
1801 }
1802
1803 int nodeId = restarter.getDbNodeId(rand() % restarter.getNumDbNodes());
1804
1805 if (hugoTrans.loadTable(pNdb, rows) != 0)
1806 {
1807 return NDBT_FAILED;
1808 }
1809
1810 if (!all)
1811 {
1812 ndbout << "Making " << nodeId << " crash" << endl;
1813 int kill[] = { 9999, 3000, 10000 };
1814 CHECK(restarter.dumpStateOneNode(nodeId, val, 2) == 0);
1815 CHECK(restarter.dumpStateOneNode(nodeId, kill, 3) == 0);
1816 }
1817 else
1818 {
1819 ndbout << "Crashing cluster" << endl;
1820 ctx->setProperty("StopAbort", 3000 + rand() % (10000 - 3000));
1821 }
1822
1823 int deletedrows[100];
1824 Uint64 end = NdbTick_CurrentMillisecond() + 13000;
1825 do {
1826 Uint32 cnt = 0;
1827 for (; cnt<NDB_ARRAY_SIZE(deletedrows); cnt++)
1828 {
1829 deletedrows[cnt] = rand() % rows;
1830 if (hugoTrans.startTransaction(pNdb))
1831 break;
1832 if (hugoTrans.pkDeleteRecord(pNdb, deletedrows[cnt]))
1833 break;
1834 if (hugoTrans.execute_Commit(pNdb))
1835 break;
1836 hugoTrans.closeTransaction(pNdb);
1837 }
1838 if (hugoTrans.getTransaction() != 0)
1839 hugoTrans.closeTransaction(pNdb);
1840
1841 if (hugoTrans.scanUpdateRecords(pNdb, NdbScanOperation::SF_TupScan,0)!=0)
1842 break;
1843
1844 for (Uint32 n = 0; n<cnt; n++)
1845 {
1846 if (hugoTrans.startTransaction(pNdb))
1847 break;
1848 if (hugoTrans.pkInsertRecord(pNdb, deletedrows[n], 1, rand()))
1849 break;
1850 if (hugoTrans.execute_Commit(pNdb))
1851 break;
1852 hugoTrans.closeTransaction(pNdb);
1853 }
1854 if (hugoTrans.getTransaction() != 0)
1855 hugoTrans.closeTransaction(pNdb);
1856
1857 if (hugoTrans.scanUpdateRecords(pNdb, NdbScanOperation::SF_TupScan,0)!=0
1858 && !hugoTrans.getRetryMaxReached())
1859 break;
1860 } while (NdbTick_CurrentMillisecond() < end);
1861
1862 if (!all)
1863 {
1864 ndbout << "Waiting for " << nodeId << " to restart" << endl;
1865 CHECK(restarter.waitNodesNoStart(&nodeId, 1) == 0);
1866 ndbout << "Restarting cluster" << endl;
1867 CHECK(restarter.restartAll(false, true, true) == 0);
1868 }
1869 else
1870 {
1871 ndbout << "Waiting for cluster to restart" << endl;
1872 }
1873
1874 CHECK(restarter.waitClusterNoStart() == 0);
1875 CHECK(restarter.startAll() == 0);
1876 CHECK(restarter.waitClusterStarted() == 0);
1877 if (error)
1878 {
1879 restarter.insertErrorInAllNodes(error);
1880 }
1881
1882 ndbout << "Starting backup..." << flush;
1883 CHECK(backup.start() == 0);
1884 ndbout << "done" << endl;
1885
1886 int cnt = 0;
1887 CHECK(hugoTrans.selectCount(pNdb, 0, &cnt) == 0);
1888 ndbout << "Found " << cnt << " records..." << endl;
1889 ndbout << "Updating..." << endl;
1890 CHECK(hugoTrans.scanUpdateRecords(pNdb,
1891 NdbScanOperation::SF_TupScan, cnt) == 0);
1892 ndbout << "Clearing..." << endl;
1893 CHECK(hugoTrans.clearTable(pNdb,
1894 NdbScanOperation::SF_TupScan, cnt) == 0);
1895 i++;
1896 }
1897
1898 if (error)
1899 {
1900 restarter.insertErrorInAllNodes(0);
1901 }
1902
1903 ndbout << "runSR_DD_3 finished" << endl;
1904 ctx->stopTest();
1905 return result;
1906 }
1907
runBug22696(NDBT_Context * ctx,NDBT_Step * step)1908 int runBug22696(NDBT_Context* ctx, NDBT_Step* step)
1909 {
1910 Ndb* pNdb = GETNDB(step);
1911 int result = NDBT_OK;
1912 Uint32 loops = ctx->getNumLoops();
1913 Uint32 rows = ctx->getNumRecords();
1914 NdbRestarter restarter;
1915 HugoTransactions hugoTrans(*ctx->getTab());
1916
1917 Uint32 i = 0;
1918 while(i<=loops && result != NDBT_FAILED)
1919 {
1920 ndbout_c("loop %u", i);
1921 for (Uint32 j = 0; j<10 && result != NDBT_FAILED; j++)
1922 CHECK(hugoTrans.scanUpdateRecords(pNdb, rows) == 0);
1923
1924 CHECK(restarter.restartAll(false, true, i > 0 ? true : false) == 0);
1925 CHECK(restarter.waitClusterNoStart() == 0);
1926 CHECK(restarter.insertErrorInAllNodes(7072) == 0);
1927 CHECK(restarter.startAll() == 0);
1928 CHECK(restarter.waitClusterStarted() == 0);
1929 CHECK(pNdb->waitUntilReady() == 0);
1930
1931 i++;
1932 if (i < loops)
1933 {
1934 NdbSleep_SecSleep(5); // Wait for a few gcp
1935 }
1936 }
1937
1938 ctx->stopTest();
1939 return result;
1940 }
1941
1942 int
runCreateAllTables(NDBT_Context * ctx,NDBT_Step * step)1943 runCreateAllTables(NDBT_Context* ctx, NDBT_Step* step)
1944 {
1945 if (NDBT_Tables::createAllTables(GETNDB(step), false, true))
1946 return NDBT_FAILED;
1947 return NDBT_OK;
1948 }
1949
1950 int
runBasic(NDBT_Context * ctx,NDBT_Step * step)1951 runBasic(NDBT_Context* ctx, NDBT_Step* step)
1952 {
1953 Ndb* pNdb = GETNDB(step);
1954 NdbDictionary::Dictionary * pDict = pNdb->getDictionary();
1955 int loops = ctx->getNumLoops();
1956 int records = ctx->getNumRecords();
1957 NdbRestarter restarter;
1958 int result = NDBT_OK;
1959
1960 for (int l = 0; l<loops; l++)
1961 {
1962 for (int i = 0; i<NDBT_Tables::getNumTables(); i++)
1963 {
1964 const NdbDictionary::Table* tab =
1965 pDict->getTable(NDBT_Tables::getTable(i)->getName());
1966 HugoTransactions trans(* tab);
1967 switch(l % 3){
1968 case 0:
1969 trans.loadTable(pNdb, records);
1970 trans.scanUpdateRecords(pNdb, records);
1971 break;
1972 case 1:
1973 trans.scanUpdateRecords(pNdb, records);
1974 trans.clearTable(pNdb, records/2);
1975 trans.loadTable(pNdb, records/2);
1976 break;
1977 case 2:
1978 trans.clearTable(pNdb, records/2);
1979 trans.loadTable(pNdb, records/2);
1980 trans.clearTable(pNdb, records/2);
1981 break;
1982 }
1983 }
1984
1985 ndbout << "Restarting cluster..." << endl;
1986 CHECK(restarter.restartAll(false, true, false) == 0);
1987 CHECK(restarter.waitClusterNoStart() == 0);
1988 CHECK(restarter.startAll() == 0);
1989 CHECK(restarter.waitClusterStarted() == 0);
1990 CHECK(pNdb->waitUntilReady() == 0);
1991
1992 for (int i = 0; i<NDBT_Tables::getNumTables(); i++)
1993 {
1994 const NdbDictionary::Table* tab =
1995 pDict->getTable(NDBT_Tables::getTable(i)->getName());
1996 HugoTransactions trans(* tab);
1997 trans.scanUpdateRecords(pNdb, records);
1998 }
1999 }
2000
2001 return result;
2002 }
2003
2004 int
runDropAllTables(NDBT_Context * ctx,NDBT_Step * step)2005 runDropAllTables(NDBT_Context* ctx, NDBT_Step* step)
2006 {
2007 NDBT_Tables::dropAllTables(GETNDB(step));
2008 return NDBT_OK;
2009 }
2010
2011 int
runTO(NDBT_Context * ctx,NDBT_Step * step)2012 runTO(NDBT_Context* ctx, NDBT_Step* step)
2013 {
2014 Ndb* pNdb = GETNDB(step);
2015 int result = NDBT_OK;
2016 Uint32 loops = ctx->getNumLoops();
2017 Uint32 rows = ctx->getNumRecords();
2018 NdbRestarter res;
2019 HugoTransactions hugoTrans(*ctx->getTab());
2020
2021 if (res.getNumDbNodes() < 2)
2022 return NDBT_OK;
2023
2024 Uint32 nodeGroups[256];
2025 Bitmask<256/32> nodeGroupMap;
2026 for (int j = 0; j<res.getNumDbNodes(); j++)
2027 {
2028 int node = res.getDbNodeId(j);
2029 nodeGroups[node] = res.getNodeGroup(node);
2030 nodeGroupMap.set(nodeGroups[node]);
2031 }
2032
2033 struct ndb_logevent event;
2034 int val[] = { DumpStateOrd::DihMinTimeBetweenLCP, 0 };
2035
2036 Uint32 i = 0;
2037 while(i<=loops && result != NDBT_FAILED)
2038 {
2039 if (i > 0 && ctx->closeToTimeout(35))
2040 break;
2041
2042 CHECK(res.dumpStateAllNodes(val, 1) == 0);
2043
2044 int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 };
2045 NdbLogEventHandle handle =
2046 ndb_mgm_create_logevent_handle(res.handle, filter);
2047
2048 Bitmask<256/32> notstopped = nodeGroupMap;
2049 while(!notstopped.isclear())
2050 {
2051 int node;
2052 do {
2053 node = res.getDbNodeId(rand() % res.getNumDbNodes());
2054 } while (!notstopped.get(nodeGroups[node]));
2055
2056 notstopped.clear(nodeGroups[node]);
2057 ndbout_c("stopping %u", node);
2058 CHECK(res.restartOneDbNode(node, false, true, true) == 0);
2059 CHECK(res.waitNodesNoStart(&node, 1) == 0);
2060 for (Uint32 j = 0; j<25; j++)
2061 {
2062 if (! (hugoTrans.scanUpdateRecords(pNdb, 0) == 0))
2063 break;
2064 }
2065 while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
2066 event.type != NDB_LE_LocalCheckpointCompleted);
2067 }
2068
2069 while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
2070 event.type != NDB_LE_LocalCheckpointCompleted);
2071
2072 Uint32 LCP = event.LocalCheckpointCompleted.lci;
2073 ndbout_c("LCP: %u", LCP);
2074
2075 do
2076 {
2077 bzero(&event, sizeof(event));
2078 while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
2079 event.type != NDB_LE_LocalCheckpointCompleted)
2080 bzero(&event, sizeof(event));
2081
2082 if (event.type == NDB_LE_LocalCheckpointCompleted &&
2083 event.LocalCheckpointCompleted.lci < LCP + 3)
2084 {
2085 hugoTrans.scanUpdateRecords(pNdb, 0);
2086 }
2087 else
2088 {
2089 break;
2090 }
2091 } while (true);
2092
2093 ndbout_c("LCP: %u", event.LocalCheckpointCompleted.lci);
2094
2095 CHECK(res.restartAll(false, true, true) == 0);
2096 CHECK(res.waitClusterNoStart() == 0);
2097 CHECK(res.startAll() == 0);
2098 Uint64 now = NdbTick_CurrentMillisecond();
2099 /**
2100 * running transaction while cluster is down...
2101 * causes *lots* of printouts...redirect to /dev/null
2102 * so that log files doe't get megabytes
2103 */
2104 NullOutputStream null;
2105 OutputStream * save[1];
2106 save[0] = g_err.m_out;
2107 g_err.m_out = &null;
2108 do
2109 {
2110 hugoTrans.scanUpdateRecords(pNdb, 0);
2111 } while (NdbTick_CurrentMillisecond() < (now + 30000));
2112 g_err.m_out = save[0];
2113 CHECK(res.waitClusterStarted() == 0);
2114 CHECK(pNdb->waitUntilReady() == 0);
2115
2116 hugoTrans.clearTable(pNdb);
2117 hugoTrans.loadTable(pNdb, rows);
2118
2119 CHECK(res.dumpStateAllNodes(val, 1) == 0);
2120
2121 while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
2122 event.type != NDB_LE_LocalCheckpointCompleted);
2123
2124 ndb_mgm_destroy_logevent_handle(&handle);
2125
2126 i++;
2127 }
2128
2129 res.dumpStateAllNodes(val, 2); // Reset LCP time
2130
2131 ctx->stopTest();
2132 return result;
2133 }
2134
runBug45154(NDBT_Context * ctx,NDBT_Step * step)2135 int runBug45154(NDBT_Context* ctx, NDBT_Step* step)
2136 {
2137 Ndb* pNdb = GETNDB(step);
2138 NdbDictionary::Dictionary * pDict = pNdb->getDictionary();
2139 int result = NDBT_OK;
2140 Uint32 loops = ctx->getNumLoops();
2141 Uint32 rows = ctx->getNumRecords();
2142 NdbRestarter restarter;
2143
2144 restarter.getNumDbNodes();
2145 int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 };
2146 NdbLogEventHandle handle =
2147 ndb_mgm_create_logevent_handle(restarter.handle, filter);
2148
2149 struct ndb_logevent event;
2150
2151 Uint32 frag_data[128];
2152 bzero(frag_data, sizeof(frag_data));
2153
2154 NdbDictionary::HashMap map;
2155 pDict->getDefaultHashMap(map, 2*restarter.getNumDbNodes());
2156 pDict->createHashMap(map);
2157
2158 pDict->getDefaultHashMap(map, restarter.getNumDbNodes());
2159 pDict->createHashMap(map);
2160
2161 for(Uint32 i = 0; i < loops && result != NDBT_FAILED; i++)
2162 {
2163 ndbout_c("loop %u", i);
2164
2165 NdbDictionary::Table copy = *ctx->getTab();
2166 copy.setName("BUG_45154");
2167 copy.setFragmentType(NdbDictionary::Object::DistrKeyLin);
2168 copy.setFragmentCount(2 * restarter.getNumDbNodes());
2169 copy.setFragmentData(frag_data, 2*restarter.getNumDbNodes());
2170 pDict->dropTable("BUG_45154");
2171 int res = pDict->createTable(copy);
2172 if (res != 0)
2173 {
2174 ndbout << pDict->getNdbError() << endl;
2175 return NDBT_FAILED;
2176 }
2177 const NdbDictionary::Table* copyptr= pDict->getTable("BUG_45154");
2178
2179 {
2180 HugoTransactions hugoTrans(*copyptr);
2181 hugoTrans.loadTable(pNdb, rows);
2182 }
2183
2184 int dump[] = { DumpStateOrd::DihStartLcpImmediately };
2185 for (int l = 0; l<2; l++)
2186 {
2187 CHECK(restarter.dumpStateAllNodes(dump, 1) == 0);
2188 while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
2189 event.type != NDB_LE_LocalCheckpointStarted);
2190 while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
2191 event.type != NDB_LE_LocalCheckpointCompleted);
2192 }
2193
2194 pDict->dropTable("BUG_45154");
2195 copy.setFragmentCount(restarter.getNumDbNodes());
2196 copy.setFragmentData(frag_data, restarter.getNumDbNodes());
2197 res = pDict->createTable(copy);
2198 if (res != 0)
2199 {
2200 ndbout << pDict->getNdbError() << endl;
2201 return NDBT_FAILED;
2202 }
2203 copyptr = pDict->getTable("BUG_45154");
2204
2205 {
2206 HugoTransactions hugoTrans(*copyptr);
2207 hugoTrans.loadTable(pNdb, rows);
2208 for (Uint32 pp = 0; pp<3; pp++)
2209 hugoTrans.scanUpdateRecords(pNdb, rows);
2210 }
2211 restarter.restartAll(false, true, true);
2212 restarter.waitClusterNoStart();
2213 restarter.startAll();
2214 restarter.waitClusterStarted();
2215
2216 pDict->dropTable("BUG_45154");
2217 }
2218
2219 ctx->stopTest();
2220 return result;
2221 }
2222
runBug46651(NDBT_Context * ctx,NDBT_Step * step)2223 int runBug46651(NDBT_Context* ctx, NDBT_Step* step)
2224 {
2225 Ndb* pNdb = GETNDB(step);
2226 NdbDictionary::Dictionary * pDict = pNdb->getDictionary();
2227 Uint32 rows = ctx->getNumRecords();
2228 NdbRestarter res;
2229
2230 NdbDictionary::Table tab;
2231 tab.setName("BUG_46651");
2232
2233 NdbDictionary::Column col;
2234 col.setName("ATTR1");
2235 col.setType(NdbDictionary::Column::Unsigned);
2236 col.setLength(1);
2237 col.setPrimaryKey(true);
2238 col.setNullable(false);
2239 col.setAutoIncrement(false);
2240 tab.addColumn(col);
2241 col.setName("ATTR2");
2242 col.setType(NdbDictionary::Column::Unsigned);
2243 col.setLength(1);
2244 col.setPrimaryKey(false);
2245 col.setNullable(false);
2246 tab.addColumn(col);
2247 col.setName("ATTR3");
2248 col.setType(NdbDictionary::Column::Unsigned);
2249 col.setLength(1);
2250 col.setPrimaryKey(false);
2251 col.setNullable(false);
2252 tab.addColumn(col);
2253 tab.setForceVarPart(true);
2254 pDict->dropTable(tab.getName());
2255 if (pDict->createTable(tab))
2256 {
2257 ndbout << pDict->getNdbError() << endl;
2258 return NDBT_FAILED;
2259 }
2260
2261 const NdbDictionary::Table* pTab = pDict->getTable(tab.getName());
2262 if (pTab == 0)
2263 {
2264 ndbout << pDict->getNdbError() << endl;
2265 return NDBT_FAILED;
2266 }
2267
2268 {
2269 HugoTransactions trans(* pTab);
2270 if (trans.loadTable(pNdb, rows) != 0)
2271 {
2272 return NDBT_FAILED;
2273 }
2274 }
2275
2276 res.restartAll2(NdbRestarter::NRRF_NOSTART);
2277 if (res.waitClusterNoStart())
2278 return NDBT_FAILED;
2279 res.startAll();
2280 if (res.waitClusterStarted())
2281 return NDBT_FAILED;
2282
2283 pNdb->waitUntilReady();
2284
2285 NdbDictionary::Table newTab = *pTab;
2286 col.setName("ATTR4");
2287 col.setType(NdbDictionary::Column::Varbinary);
2288 col.setLength(25);
2289 col.setPrimaryKey(false);
2290 col.setNullable(true);
2291 col.setDynamic(true);
2292 newTab.addColumn(col);
2293
2294 if (pDict->alterTable(*pTab, newTab))
2295 {
2296 ndbout << pDict->getNdbError() << endl;
2297 return NDBT_FAILED;
2298 }
2299
2300 res.restartAll2(NdbRestarter::NRRF_NOSTART | NdbRestarter::NRRF_ABORT);
2301 if (res.waitClusterNoStart())
2302 return NDBT_FAILED;
2303 res.startAll();
2304 if (res.waitClusterStarted())
2305 return NDBT_FAILED;
2306
2307 pNdb->waitUntilReady();
2308 pDict->dropTable(tab.getName());
2309
2310 return NDBT_OK;
2311 }
2312
2313 int
runBug46412(NDBT_Context * ctx,NDBT_Step * step)2314 runBug46412(NDBT_Context* ctx, NDBT_Step* step)
2315 {
2316 Uint32 loops = ctx->getNumLoops();
2317 NdbRestarter res;
2318 const Uint32 nodeCount = res.getNumDbNodes();
2319 if(nodeCount < 2)
2320 {
2321 return NDBT_OK;
2322 }
2323
2324 for (Uint32 l = 0; l<loops; l++)
2325 {
2326 loop:
2327 printf("checking nodegroups of getNextMasterNodeId(): ");
2328 int nodes[256];
2329 bzero(nodes, sizeof(nodes));
2330 nodes[0] = res.getMasterNodeId();
2331 printf("%d ", nodes[0]);
2332 for (Uint32 i = 1; i<nodeCount; i++)
2333 {
2334 nodes[i] = res.getNextMasterNodeId(nodes[i-1]);
2335 printf("%d ", nodes[i]);
2336 }
2337 printf("\n");
2338
2339 Bitmask<256/32> ng;
2340 int cnt = 0;
2341 int restartnodes[256];
2342
2343 Uint32 limit = (nodeCount / 2);
2344 for (Uint32 i = 0; i<limit; i++)
2345 {
2346 int tmp = res.getNodeGroup(nodes[i]);
2347 printf("node %d ng: %d", nodes[i], tmp);
2348 if (ng.get(tmp))
2349 {
2350 restartnodes[cnt++] = nodes[i];
2351 ndbout_c(" COLLISION");
2352 limit++;
2353 if (limit > nodeCount)
2354 limit = nodeCount;
2355 }
2356 else
2357 {
2358 ng.set(tmp);
2359 ndbout_c(" OK");
2360 }
2361 }
2362
2363 if (cnt)
2364 {
2365 printf("restarting nodes: ");
2366 for (int i = 0; i<cnt; i++)
2367 printf("%d ", restartnodes[i]);
2368 printf("\n");
2369 for (int i = 0; i<cnt; i++)
2370 {
2371 res.restartOneDbNode(restartnodes[i], false, true, true);
2372 }
2373 res.waitNodesNoStart(restartnodes, cnt);
2374 res.startNodes(restartnodes, cnt);
2375 if (res.waitClusterStarted())
2376 return NDBT_FAILED;
2377
2378 goto loop;
2379 }
2380
2381 int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
2382 res.dumpStateAllNodes(val2, 2);
2383
2384 Bitmask<256/32> mask;
2385 for (Uint32 i = 0; i<(nodeCount / 2); i++)
2386 {
2387 int node = nodes[(nodeCount / 2) - (i + 1)];
2388 mask.set(node);
2389 res.insertErrorInNode(node, 7218);
2390 }
2391
2392 for (Uint32 i = 0; i<nodeCount; i++)
2393 {
2394 int node = nodes[i];
2395 if (mask.get(node))
2396 continue;
2397 res.insertErrorInNode(node, 7220);
2398 }
2399
2400 int lcp = 7099;
2401 res.dumpStateAllNodes(&lcp, 1);
2402
2403 res.waitClusterNoStart();
2404 res.startAll();
2405 if (res.waitClusterStarted())
2406 return NDBT_FAILED;
2407 }
2408
2409 return NDBT_OK;
2410 }
2411
2412 int
runScanUpdateUntilStopped(NDBT_Context * ctx,NDBT_Step * step)2413 runScanUpdateUntilStopped(NDBT_Context* ctx, NDBT_Step* step)
2414 {
2415 Ndb* pNdb = GETNDB(step);
2416 HugoTransactions hugoTrans(*ctx->getTab());
2417
2418 NullOutputStream null;
2419 OutputStream * save[1];
2420 save[0] = g_err.m_out;
2421 g_err.m_out = &null;
2422 while (!ctx->isTestStopped())
2423 {
2424 hugoTrans.scanUpdateRecords(pNdb, 0);
2425 }
2426 g_err.m_out = save[0];
2427 return NDBT_OK;
2428 }
2429
2430 int
runBug48436(NDBT_Context * ctx,NDBT_Step * step)2431 runBug48436(NDBT_Context* ctx, NDBT_Step* step)
2432 {
2433 NdbRestarter res;
2434 Uint32 loops = ctx->getNumLoops();
2435 const Uint32 nodeCount = res.getNumDbNodes();
2436 if(nodeCount < 2)
2437 {
2438 return NDBT_OK;
2439 }
2440
2441 for (Uint32 l = 0; l<loops; l++)
2442 {
2443 int nodes[2];
2444 nodes[0] = res.getNode(NdbRestarter::NS_RANDOM);
2445 nodes[1] = res.getRandomNodeSameNodeGroup(nodes[0], rand());
2446 int val = 7099;
2447 int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
2448
2449 ndbout_c("nodes %u %u", nodes[0], nodes[1]);
2450
2451 for (Uint32 j = 0; j<5; j++)
2452 {
2453 int c = (rand()) % 11;
2454 ndbout_c("case: %u", c);
2455 switch(c){
2456 case 0:
2457 case 1:
2458 res.dumpStateAllNodes(&val, 1);
2459 case 2:
2460 case 3:
2461 case 4:
2462 case 5:
2463 res.restartOneDbNode(nodes[0], false, true, true);
2464 res.waitNodesNoStart(nodes+0,1);
2465 res.dumpStateOneNode(nodes[0], val2, 2);
2466 res.insertErrorInNode(nodes[0], 5054); // crash during restart
2467 res.startAll();
2468 sleep(3);
2469 res.waitNodesNoStart(nodes+0,1);
2470 res.startAll();
2471 break;
2472 case 6:
2473 res.restartOneDbNode(nodes[0], false, true, true);
2474 res.waitNodesNoStart(nodes+0, 1);
2475 res.startAll();
2476 break;
2477 case 7:
2478 res.dumpStateAllNodes(&val, 1);
2479 case 8:
2480 res.restartOneDbNode(nodes[1], false, true, true);
2481 res.waitNodesNoStart(nodes+1,1);
2482 res.dumpStateOneNode(nodes[1], val2, 2);
2483 res.insertErrorInNode(nodes[1], 5054); // crash during restart
2484 res.startAll();
2485 sleep(3);
2486 res.waitNodesNoStart(nodes+1,1);
2487 res.startAll();
2488 break;
2489 case 9:
2490 res.restartAll(false, true, true);
2491 res.waitClusterNoStart();
2492 res.startAll();
2493 break;
2494 case 10:
2495 {
2496 res.dumpStateAllNodes(val2, 2);
2497 int node = res.getMasterNodeId();
2498 res.insertErrorInNode(node, 7222);
2499 res.waitClusterNoStart();
2500 res.startAll();
2501 break;
2502 }
2503 }
2504 res.waitClusterStarted();
2505 }
2506 res.restartAll(false, true, true);
2507 res.waitClusterNoStart();
2508 res.startAll();
2509 res.waitClusterStarted();
2510 }
2511 ctx->stopTest();
2512
2513 return NDBT_OK;
2514 }
2515
2516 int
runBug54611(NDBT_Context * ctx,NDBT_Step * step)2517 runBug54611(NDBT_Context* ctx, NDBT_Step* step)
2518 {
2519 NdbRestarter res;
2520 Uint32 loops = ctx->getNumLoops();
2521 Ndb* pNdb = GETNDB(step);
2522 int rows = ctx->getNumRecords();
2523
2524 HugoTransactions hugoTrans(*ctx->getTab());
2525
2526 for (Uint32 l = 0; l<loops; l++)
2527 {
2528 int val = DumpStateOrd::DihMinTimeBetweenLCP;
2529 res.dumpStateAllNodes(&val, 1);
2530
2531 for (Uint32 i = 0; i < 5; i++)
2532 {
2533 hugoTrans.scanUpdateRecords(pNdb, rows);
2534 }
2535
2536 int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
2537 res.dumpStateAllNodes(val2, 2);
2538
2539 int node = res.getMasterNodeId();
2540 res.insertErrorInNode(node, 7222);
2541
2542 while (hugoTrans.scanUpdateRecords(pNdb, rows) == 0);
2543 res.waitClusterNoStart();
2544
2545 res.insertErrorInAllNodes(5055);
2546 res.startAll();
2547 res.waitClusterStarted();
2548 pNdb->waitUntilReady();
2549 }
2550
2551 return NDBT_OK;
2552 }
2553
2554 int
runBug56961(NDBT_Context * ctx,NDBT_Step * step)2555 runBug56961(NDBT_Context* ctx, NDBT_Step* step)
2556 {
2557 NdbRestarter res;
2558 Uint32 loops = ctx->getNumLoops();
2559 Ndb* pNdb = GETNDB(step);
2560 int rows = ctx->getNumRecords();
2561
2562 int node = res.getNode(NdbRestarter::NS_RANDOM);
2563 int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
2564 HugoTransactions hugoTrans(*ctx->getTab());
2565
2566 for (Uint32 l = 0; l<loops; l++)
2567 {
2568 ndbout_c("Waiting for %d to restart (5058)", node);
2569 res.dumpStateOneNode(node, val2, 2);
2570 res.insertErrorInNode(node, 5058);
2571
2572 hugoTrans.clearTable(pNdb);
2573 hugoTrans.loadTable(pNdb, rows);
2574 while (hugoTrans.scanUpdateRecords(pNdb, rows) == NDBT_OK &&
2575 res.getNodeStatus(node) != NDB_MGM_NODE_STATUS_NOT_STARTED &&
2576 res.getNodeStatus(node) != NDB_MGM_NODE_STATUS_NO_CONTACT);
2577 res.waitNodesNoStart(&node, 1);
2578 res.startNodes(&node, 1);
2579 ndbout_c("Waiting for %d to start", node);
2580 res.waitClusterStarted();
2581
2582 ndbout_c("Waiting for %d to restart (5059)", node);
2583 res.dumpStateOneNode(node, val2, 2);
2584 res.insertErrorInNode(node, 5059);
2585
2586 hugoTrans.clearTable(pNdb);
2587 hugoTrans.loadTable(pNdb, rows);
2588 while (hugoTrans.scanUpdateRecords(pNdb, rows) == NDBT_OK &&
2589 res.getNodeStatus(node) != NDB_MGM_NODE_STATUS_NOT_STARTED &&
2590 res.getNodeStatus(node) != NDB_MGM_NODE_STATUS_NO_CONTACT);
2591 res.waitNodesNoStart(&node, 1);
2592 res.startNodes(&node, 1);
2593 ndbout_c("Waiting for %d to start", node);
2594 res.waitClusterStarted();
2595 pNdb->waitUntilReady();
2596 }
2597
2598 return NDBT_OK;
2599 }
2600
runAddNodes(NDBT_Context * ctx,NDBT_Step * step)2601 int runAddNodes(NDBT_Context* ctx, NDBT_Step* step)
2602 {
2603 /*
2604 To add new nodes online, the two nodes should be already up in the cluster,
2605 with nodegroup 65536. Then they can be added to the cluster online using the
2606 ndb_mgm command create nodegroup. Here,
2607 1. we retrieve the list of such nodes with ng 65536(internally -256) and
2608 2. add them to the cluster by passing them to the mgmapi function
2609 ndb_mgm_create_nodegroup().
2610 */
2611 NdbRestarter restarter;
2612
2613 Vector<int> newNodes;
2614 int ng;
2615
2616 /* Retrieve the list of nodes with nodegroup 65536(-256) */
2617 for(int i= 0; i < restarter.getNumDbNodes(); i++ )
2618 {
2619 int _node_id= restarter.getDbNodeId(i);
2620 if(restarter.getNodeGroup(_node_id) == -256)
2621 {
2622 /* nodes that don't have a nodegroup yet */
2623 newNodes.push_back(_node_id);
2624 }
2625 }
2626
2627 /* if there are no new nodes, can't test add node restart */
2628 if(newNodes.size() == 0)
2629 {
2630 g_err << "ERR: "<< step->getName()
2631 << " failed on line " << __LINE__ << endl;
2632 g_err << "Incorrect cluster configuration."
2633 << "Requires additional nodes with nodegroup 65536." << endl;
2634 return NDBT_FAILED;
2635 }
2636
2637 /* end of array value for newNodes */
2638 newNodes.push_back(0);
2639
2640 /* include the new nodes into cluster using ndb_mgm_create_nodegroup() */
2641 if(ndb_mgm_create_nodegroup(restarter.handle, newNodes.getBase(),
2642 &ng, NULL) != 0)
2643 {
2644 g_err << "ERR: "<< step->getName()
2645 << " failed on line " << __LINE__ << endl;
2646 g_err << ndb_mgm_get_latest_error_desc(restarter.handle) << endl;
2647 return NDBT_FAILED;
2648 }
2649 g_info << "New nodes added to nodegroup " << ng << endl;
2650
2651 return NDBT_OK;
2652 }
2653
runAlterTableAndOptimize(NDBT_Context * ctx,NDBT_Step * step)2654 int runAlterTableAndOptimize(NDBT_Context* ctx, NDBT_Step* step)
2655 {
2656 NdbRestarter restarter;
2657 /* check if there is a possibility of node killing during redistribution */
2658 bool nodesKilledDuringStep= ctx->getProperty("NodesKilledDuringStep");
2659
2660 /* Redistribute existing cluster data */
2661 DbUtil sql("TEST_DB");
2662 {
2663 BaseString query;
2664 int numOfTables = ctx->getNumTables();
2665
2666 /* ALTER ONLINE TABLE <tbl_name> REORGANIZE PARTITION */
2667 for(int i= 0; i < numOfTables; i++ )
2668 {
2669 SqlResultSet resultSet;
2670 query.assfmt("ALTER ONLINE TABLE %s REORGANIZE PARTITION",
2671 ctx->getTableName(i));
2672 g_info << "Executing query : "<< query.c_str() << endl;
2673
2674 if(!sql.doQuery(query.c_str(), resultSet)){
2675 if(nodesKilledDuringStep &&
2676 sql.getErrorNumber() == 0)
2677 {
2678 /* query failed probably because of a node kill in another step.
2679 wait for the nodes to get into start phase before retrying */
2680 if(restarter.waitClusterStarted() != 0){
2681 g_err << "Cluster went down during reorganize partition" << endl;
2682 return NDBT_FAILED;
2683 }
2684 /* retry the query for same table */
2685 i--;
2686 nodesKilledDuringStep= false;
2687 continue;
2688 } else {
2689 /* either the query failed due to returning error code from server
2690 or cluster crash */
2691 g_err << "QUERY : "<< query.c_str() << "; failed" << endl;
2692 return NDBT_FAILED;
2693 }
2694 }
2695 }
2696
2697 if(nodesKilledDuringStep){
2698 /* Nodes were supposed to be killed during alter table,
2699 but they never were. Test lost its purpose. Mark it as failed
2700 Mostly won't happen. Just insuring. */
2701 g_err << "Nodes were never killed during alter table." << endl;
2702 return NDBT_FAILED;
2703 }
2704
2705 /* Reclaim freed space by running optimize table */
2706 for(int i= 0; i < numOfTables; i++ )
2707 {
2708 SqlResultSet result;
2709 BaseString query;
2710 query.assfmt("OPTIMIZE TABLE %s", ctx->getTableName(i));
2711 g_info << "Executing query : "<< query.c_str() << endl;
2712 if (!sql.doQuery(query.c_str(), result)){
2713 g_err << "Failed executing optimize table" << endl;
2714 return NDBT_FAILED;
2715 }
2716 }
2717 }
2718 return NDBT_OK;
2719 }
2720
runKillTwoNodes(NDBT_Context * ctx,NDBT_Step * step)2721 int runKillTwoNodes(NDBT_Context* ctx, NDBT_Step* step)
2722 {
2723 NdbRestarter restarter;
2724 int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
2725 int kill[] = { 9999, 3000, 10000 };
2726 int result = NDBT_OK;
2727
2728 Vector<int> nodes;
2729
2730 /* choose first victim */
2731 nodes.push_back(restarter.getDbNodeId(rand() % restarter.getNumDbNodes()));
2732 /* select a node from different group as next victim */
2733 nodes.push_back(restarter.getRandomNodeOtherNodeGroup(nodes[0], rand()));
2734 for(int i = 0; i < 2; i++){
2735 g_info << "Killing node " << nodes[i] << "..." << endl;
2736 CHECK(restarter.dumpStateOneNode(nodes[i], val, 2) == 0);
2737 CHECK(restarter.dumpStateOneNode(nodes[i], kill, 3) == 0);
2738 }
2739
2740 /* wait for both of them to come into no start */
2741 if(restarter.waitNodesNoStart(nodes.getBase(), 2) != 0)
2742 {
2743 g_err << "Nodes never restarted" << endl;
2744 return NDBT_FAILED;
2745 }
2746
2747 /* start the killed nodes */
2748 if(restarter.startNodes(nodes.getBase(), 2) != 0)
2749 {
2750 g_err << "Unable to start killed node." << endl;
2751 return NDBT_FAILED;
2752 }
2753
2754 /* wait for nodes to get started */
2755 if(restarter.waitNodesStarted(nodes.getBase(), nodes.size()) != 0)
2756 {
2757 g_err << "Killed nodes stuck in start phase." << endl;
2758 return NDBT_FAILED;
2759 }
2760
2761 return result;
2762 }
2763
runRestartOneNode(NDBT_Context * ctx,NDBT_Step * step)2764 int runRestartOneNode(NDBT_Context* ctx, NDBT_Step* step){
2765 Ndb* pNdb = GETNDB(step);
2766 int result = NDBT_OK;
2767 int timeout = 300;
2768 int records = ctx->getNumRecords();
2769 int count;
2770 NdbRestarter restarter;
2771 const int nodeCount = restarter.getNumDbNodes();
2772 if(nodeCount < 2){
2773 g_info << "RestartOneNode - Needs atleast 2 nodes to test" << endl;
2774 return NDBT_OK;
2775 }
2776 Vector<int> nodeIds;
2777 for(int i = 0; i<nodeCount; i++)
2778 nodeIds.push_back(restarter.getDbNodeId(i));
2779 Uint32 currentRestartNodeIndex = 0;
2780 HugoTransactions hugoTrans(*ctx->getTab());
2781 int cnt = nodeCount;
2782 /**
2783 1. Load data
2784 2. One by one restart all nodes with -nostart
2785 3. Verify records
2786 **/
2787
2788 /*** 1 ***/
2789 g_info << "1- Loading Data " << endl;
2790 hugoTrans.loadTable(pNdb, records);
2791
2792 while(cnt-- && result != NDBT_FAILED)
2793 {
2794 /*** 2 ***/
2795 g_info << "2- Restarting node : " << nodeIds[currentRestartNodeIndex]<< endl;
2796
2797 CHECK(restarter.restartOneDbNode(nodeIds[currentRestartNodeIndex],
2798 false,//Initial
2799 true,//nostart
2800 false//abort
2801 ) == 0);
2802 CHECK(restarter.waitNodesNoStart(&nodeIds[currentRestartNodeIndex], 1, timeout) == 0);
2803 CHECK(restarter.startNodes(&nodeIds[currentRestartNodeIndex], 1) == 0);
2804 CHECK(restarter.waitNodesStarted(&nodeIds[currentRestartNodeIndex], 1, timeout) == 0);
2805 currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
2806 }
2807
2808 /*** 3 ***/
2809 ndbout << "3- Verifying records..." << endl;
2810 if(hugoTrans.selectCount(pNdb, 64, &count) )
2811 return NDBT_FAILED;
2812 if(hugoTrans.clearTable(pNdb))
2813 return NDBT_FAILED;
2814
2815 /*** done ***/
2816 g_info << "runRestartOneNode finished" << endl;
2817 return result;
2818 }
2819
runMixedModeRestart(NDBT_Context * ctx,NDBT_Step * step)2820 int runMixedModeRestart(NDBT_Context* ctx, NDBT_Step* step){
2821 int result = NDBT_OK;
2822 int timeout = 300;
2823 NdbRestarter restarter;
2824 const int nodeCount = restarter.getNumDbNodes();
2825 if(nodeCount < 4){
2826 g_info << "MixedModeRestart - Needs atleast 4 nodes to test" << endl;
2827 return NDBT_OK;
2828 }
2829 Vector<int> nodeIds;
2830 for(int i = 0; i<nodeCount; i++)
2831 nodeIds.push_back(restarter.getDbNodeId(i));
2832 int nodeToKill = nodeIds[0];
2833 int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
2834 /**
2835 1. Killing two nodes of diffrent groups.
2836 2. Starting nodes with and without --initial option.
2837 **/
2838
2839 /*** 1 ***/
2840 g_info << "1- Killing two nodes..." << endl;
2841 int otherNodeToKill = restarter.getRandomNodeOtherNodeGroup(nodeToKill,rand());
2842 if(otherNodeToKill == -1)
2843 return NDBT_FAILED;
2844
2845 int kill[] = { 9999, 3000, 10000 };
2846
2847 g_info <<" Killing node : "<< nodeToKill << endl;
2848 if(restarter.dumpStateOneNode(nodeToKill, val, 2))
2849 return NDBT_FAILED;
2850 if(restarter.dumpStateOneNode(nodeToKill, kill, 3))
2851 return NDBT_FAILED;
2852
2853 g_info <<" Killing node : "<< otherNodeToKill << endl;
2854 if(restarter.dumpStateOneNode(otherNodeToKill, val, 2))
2855 return NDBT_FAILED;
2856 if(restarter.dumpStateOneNode(otherNodeToKill, kill, 3))
2857 return NDBT_FAILED;
2858
2859 /*** 2 ***/
2860 g_info << "2 - Starting nodes with and without --initial option..." << endl;
2861
2862 if(restarter.restartOneDbNode(nodeToKill,
2863 false,//Initial
2864 true,//nostart
2865 false//abort
2866 ))
2867 return NDBT_FAILED;
2868 if(restarter.waitNodesNoStart(&nodeToKill, 1, timeout))
2869 return NDBT_FAILED;
2870 if(restarter.startNodes(&nodeToKill, 1))
2871 return NDBT_FAILED;
2872 if(restarter.waitNodesStarted(&nodeToKill, 1, timeout))
2873 return NDBT_FAILED;
2874
2875 if(restarter.restartOneDbNode(otherNodeToKill,
2876 true,//Initial
2877 true,//nostart
2878 false//abort
2879 ))
2880 return NDBT_FAILED;
2881 if(restarter.waitNodesNoStart(&otherNodeToKill, 1, timeout))
2882 return NDBT_FAILED;
2883 if(restarter.startNodes(&otherNodeToKill, 1))
2884 return NDBT_FAILED;
2885 if(restarter.waitNodesStarted(&otherNodeToKill, 1, timeout))
2886 return NDBT_FAILED;
2887
2888 /*** done ***/
2889 g_info << "runMixedModeRestart finished" << endl;
2890 return result;
2891 }
2892
runStartWithNodeGroupZero(NDBT_Context * ctx,NDBT_Step * step)2893 int runStartWithNodeGroupZero(NDBT_Context* ctx, NDBT_Step* step){
2894 int result = NDBT_OK;
2895 int timeout = 300;
2896 NdbRestarter restarter;
2897 const int nodeCount = restarter.getNumDbNodes();
2898 if(nodeCount < 4){
2899 g_info << "StartWithNodeGroupZero - Needs atleast 4 nodes to test" << endl;
2900 return NDBT_OK;
2901 }
2902 Vector<int> nodeIds;
2903 for(int i = 0; i<nodeCount; i++)
2904 nodeIds.push_back(restarter.getDbNodeId(i));
2905 int nodeId = nodeIds[0];
2906 int cnt = nodeCount;
2907 int nodeGroup = 0;
2908 while(cnt-- && nodeGroup == 0 && result != NDBT_FAILED)
2909 {
2910 /**
2911 1. Finding a node of group id other then 0.
2912 2. Restart that node
2913 3. Check the group id of the above node
2914 **/
2915 /*** 1 ***/
2916 g_info << "1- Findind a node of group id other then 0" << endl;
2917 nodeGroup = restarter.getNodeGroup(nodeId);
2918 g_info << " Current node group : " << nodeGroup << endl;
2919 if(nodeGroup == 0)
2920 {
2921 g_info << " Skiping this node" << endl;
2922 nodeId = restarter.getRandomNodeOtherNodeGroup(nodeId, 4);
2923 continue;
2924 }
2925
2926 /*** 2 ***/
2927 g_info << "2- Restarting node : " << nodeId << " whose Group id is "
2928 << nodeGroup << endl;
2929
2930 CHECK(restarter.restartOneDbNode(nodeId,
2931 true,//Initial
2932 true,//nostart
2933 false//abort
2934 ) == 0);
2935 CHECK(restarter.waitNodesNoStart(&nodeId, 1, timeout) == 0);
2936 CHECK(restarter.startNodes(&nodeId, 1) == 0);
2937 CHECK(restarter.waitNodesStarted(&nodeId, 1, timeout) == 0);
2938 nodeGroup = restarter.getNodeGroup(nodeId);
2939 /*** 3 ***/
2940 g_info << "3- Checking its group id" << endl;
2941 CHECK(nodeGroup !=0)
2942 g_info << " current node group : " << nodeGroup << endl;
2943 }
2944
2945 /*** done ***/
2946 g_info << "runStartWithNodeGroupZero finished" << endl;
2947
2948 return result;
2949 }
2950
runMixedModeRestart4Node(NDBT_Context * ctx,NDBT_Step * step)2951 int runMixedModeRestart4Node(NDBT_Context* ctx, NDBT_Step* step){
2952 int result = NDBT_OK;
2953 NdbRestarter restarter;
2954 const int nodeCount = restarter.getNumDbNodes();
2955 if(nodeCount < 8){
2956 g_info << "MixedModeRestart4Node - Needs atleast 8 nodes to test" << endl;
2957 return NDBT_OK;
2958 }
2959 Vector<int> nodeIds;
2960 for(int i = 0; i<nodeCount; i++)
2961 nodeIds.push_back(restarter.getDbNodeId(i));
2962 int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
2963 /**
2964 1. Killing four nodes of diffrent groups.
2965 2. Starting nodes with and without --initial option.
2966 **/
2967
2968 /*** 1 ***/
2969 g_info << "1- Killing four nodes of diffrent groups." << endl;
2970 int nodesarray[256];
2971 int cnt = 0;
2972 int timeout = 300;
2973 Bitmask<4> seen_groups;
2974 for(int i = 0; i< nodeCount; i++)
2975 {
2976 int nodeGroup=restarter.getNodeGroup(nodeIds[i]);
2977 if (seen_groups.get(nodeGroup))
2978 {
2979 // One node in this node group already down
2980 g_info << " Continuing as one node from this group is already killed."
2981 << " NodeGroup = " << nodeGroup << endl;
2982 continue;
2983 }
2984 seen_groups.set(nodeGroup);
2985 int kill[] = { 9999, 3000, 10000 };
2986 g_info <<" Killing node : "<< nodeIds[i] << endl;
2987 CHECK(restarter.dumpStateOneNode(nodeIds[i], val, 2) == 0);
2988 CHECK(restarter.dumpStateOneNode(nodeIds[i], kill, 3) == 0);
2989 nodesarray[cnt++] = nodeIds[i];
2990 }
2991
2992 /*** 2 ***/
2993 g_info << "2- Starting nodes with and without --initial option." << endl;
2994 bool flag = true;
2995 for(int i = 0; i < cnt; i++)
2996 {
2997 CHECK(restarter.restartOneDbNode(nodesarray[i],
2998 flag,//Initial
2999 true,//nostart
3000 false//abort
3001 ) == 0);
3002 CHECK(restarter.waitNodesNoStart(&nodesarray[i], 1, timeout) == 0);
3003 CHECK(restarter.startNodes(&nodesarray[i], 1) == 0);
3004 CHECK(restarter.waitNodesStarted(&nodesarray[i], 1, timeout) == 0);
3005 flag = false;
3006 }
3007
3008 /*** done ***/
3009 g_info << "runMixedModeRestart4Node finished" << endl;
3010 return result;
3011 }
3012
runKillMasterNodes(NDBT_Context * ctx,NDBT_Step * step)3013 int runKillMasterNodes(NDBT_Context* ctx, NDBT_Step* step){
3014 int result = NDBT_OK;
3015 NdbRestarter restarter;
3016 const int nodeCount = restarter.getNumDbNodes();
3017 if(nodeCount < 4){
3018 g_info << "KillMasterNodes - Needs atleast 4 nodes to test" << endl;
3019 return NDBT_OK;
3020 }
3021
3022 Vector<int> nodeIds;
3023 for(int i = 0; i<nodeCount; i++)
3024 nodeIds.push_back(restarter.getDbNodeId(i));
3025 int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
3026 int kill[] = { 9999, 3000, 10000 };
3027 /**
3028 1. Killing only master node one by one.
3029 2. Start nodes without --initial option.
3030 **/
3031
3032 /*** 1 ***/
3033 g_info << "1- Killing only master node one by one." << endl;
3034 int nodesarray[256];
3035 int timeout = 120;
3036 int cnt= 0;
3037 Bitmask<8> seen_groups;
3038 int master = restarter.getMasterNodeId();
3039 int newMaster;
3040 for(int i = 0; i< nodeCount; i++)
3041 {
3042 g_info << "Master Node Id : " << master << endl;
3043 int nodeGroup = restarter.getNodeGroup(master);
3044 CHECK(nodeGroup != -1);
3045 if (seen_groups.get(nodeGroup))
3046 {
3047 // One node in this node group already down
3048 g_info << "Breaking because master node belongs to the group whoes one"
3049 << "node is already down. Master = " << master << ", node Group = "
3050 << nodeGroup << endl;
3051 break;
3052 }
3053 seen_groups.set(nodeGroup);
3054 nodesarray[cnt++] = master;
3055 newMaster = restarter.getNextMasterNodeId(master);
3056 g_info <<" killing node : "<< master << " group : " << nodeGroup << endl;
3057 CHECK(restarter.dumpStateOneNode(master, val, 2) == 0);
3058 CHECK(restarter.dumpStateOneNode(master, kill, 3) == 0);
3059 CHECK(restarter.waitNodesNoStart(&master, 1) == 0);
3060 master = newMaster;
3061 }
3062
3063 /*** 2 ***/
3064 g_info << "2- Starting nodes without --initial option..." << endl;
3065 for(int i = 0; i<cnt; i++)
3066 {
3067 CHECK(restarter.startNodes(&nodesarray[i], 1) == 0);
3068 CHECK(restarter.waitNodesStarted(&nodesarray[i], 1, timeout) == 0);
3069 }
3070
3071 /*** done ***/
3072 g_info << "runKillMasterNodes finished" << endl;
3073 return result;
3074 }
3075
3076 NDBT_TESTSUITE(testSystemRestart);
3077 TESTCASE("SR1",
3078 "Basic system restart test. Focus on testing restart from REDO log.\n"
3079 "NOTE! Time between lcp's and gcp's should be left at default, \n"
3080 "so that Ndb uses the Redo log when restarting\n"
3081 "1. Load records\n"
3082 "2. Restart cluster and verify records \n"
3083 "3. Update records\n"
3084 "4. Restart cluster and verify records \n"
3085 "5. Delete half of the records \n"
3086 "6. Restart cluster and verify records \n"
3087 "7. Delete all records \n"
3088 "8. Restart cluster and verify records \n"
3089 "9. Insert, update, delete records \n"
3090 "10. Restart cluster and verify records\n"
3091 "11. Insert, update, delete records \n"
3092 "12. Restart cluster with error insert 5020 and verify records\n"){
3093 INITIALIZER(runWaitStarted);
3094 STEP(runSystemRestart1);
3095 }
3096 TESTCASE("SR2",
3097 "Basic system restart test. Focus on testing restart from LCP\n"
3098 "NOTE! Time between lcp's is automatically set to it's min value\n"
3099 "so that Ndb uses LCP's when restarting.\n"
3100 "1. Load records\n"
3101 "2. Restart cluster and verify records \n"
3102 "3. Update records\n"
3103 "4. Restart cluster and verify records \n"
3104 "5. Delete half of the records \n"
3105 "6. Restart cluster and verify records \n"
3106 "7. Delete all records \n"
3107 "8. Restart cluster and verify records \n"
3108 "9. Insert, update, delete records \n"
3109 "10. Restart cluster and verify records\n"){
3110 INITIALIZER(runWaitStarted);
3111 STEP(runSystemRestart2);
3112 }
3113 TESTCASE("SR_UNDO",
3114 "System restart test. Focus on testing of undologging\n"
3115 "in DBACC and DBTUP.\n"
3116 "This is done by starting a LCP, turn on undologging \n"
3117 "but don't start writing the datapages. This will force all\n"
3118 "operations to be written into the undolog.\n"
3119 "Then write datapages and complete LCP.\n"
3120 "Restart the system\n"){
3121 INITIALIZER(runWaitStarted);
3122 STEP(runSystemRestartTestUndoLog);
3123 }
3124 TESTCASE("SR_FULLDB",
3125 "System restart test. Test to restart when DB is full.\n"){
3126 INITIALIZER(runWaitStarted);
3127 STEP(runSystemRestartTestFullDb);
3128 }
3129 TESTCASE("SR3",
3130 "System restart test. Focus on testing restart from with\n"
3131 "not all nodes alive when system went down\n"
3132 "* 1. Load data\n"
3133 "* 2. Restart 1 node -nostart\n"
3134 "* 3. Update records\n"
3135 "* 4. Restart cluster and verify records\n"
3136 "* 5. Restart 1 node -nostart\n"
3137 "* 6. Delete half of the records\n"
3138 "* 7. Restart cluster and verify records\n"
3139 "* 8. Restart 1 node -nostart\n"
3140 "* 9. Delete all records\n"
3141 "* 10. Restart cluster and verify records\n"){
3142 INITIALIZER(runWaitStarted);
3143 STEP(runSystemRestart3);
3144 }
3145 TESTCASE("SR4",
3146 "System restart test. Focus on testing restart from with\n"
3147 "not all nodes alive when system went down but running LCP at\n"
3148 "high speed so that sometimes a TO is required to start cluster\n"
3149 "* 1. Load data\n"
3150 "* 2. Restart 1 node -nostart\n"
3151 "* 3. Update records\n"
3152 "* 4. Restart cluster and verify records\n"
3153 "* 5. Restart 1 node -nostart\n"
3154 "* 6. Delete half of the records\n"
3155 "* 7. Restart cluster and verify records\n"
3156 "* 8. Restart 1 node -nostart\n"
3157 "* 9. Delete all records\n"
3158 "* 10. Restart cluster and verify records\n"){
3159 INITIALIZER(runWaitStarted);
3160 STEP(runSystemRestart4);
3161 }
3162 TESTCASE("SR5",
3163 "As SR4 but making restart aborts\n"
3164 "* 1. Load data\n"
3165 "* 2. Restart 1 node -nostart\n"
3166 "* 3. Update records\n"
3167 "* 4. Restart cluster and verify records\n"
3168 "* 5. Restart 1 node -nostart\n"
3169 "* 6. Delete half of the records\n"
3170 "* 7. Restart cluster and verify records\n"
3171 "* 8. Restart 1 node -nostart\n"
3172 "* 9. Delete all records\n"
3173 "* 10. Restart cluster and verify records\n"){
3174 INITIALIZER(runWaitStarted);
3175 STEP(runSystemRestart5);
3176 }
3177 TESTCASE("SR6",
3178 "Perform system restart with some nodes having FS others wo/\n"
3179 "* 1. Load data\n"
3180 "* 2. Restart all node -nostart\n"
3181 "* 3. Restart some nodes -i -nostart\n"
3182 "* 4. Start all nodes verify records\n"){
3183 INITIALIZER(runWaitStarted);
3184 INITIALIZER(runClearTable);
3185 STEP(runSystemRestart6);
3186 }
3187 TESTCASE("SR7",
3188 "Perform partition win system restart\n"
3189 "* 1. Load data\n"
3190 "* 2. Restart all node -nostart\n"
3191 "* 3. Start all but one node\n"
3192 "* 4. Verify records\n"
3193 "* 5. Start last node\n"
3194 "* 6. Verify records\n"){
3195 INITIALIZER(runWaitStarted);
3196 INITIALIZER(runClearTable);
3197 STEP(runSystemRestart7);
3198 }
3199 TESTCASE("SR8",
3200 "Perform partition win system restart with other nodes delayed\n"
3201 "* 1. Load data\n"
3202 "* 2. Restart all node -nostart\n"
3203 "* 3. Start all but one node\n"
3204 "* 4. Wait for startphase >= 2\n"
3205 "* 5. Start last node\n"
3206 "* 6. Verify records\n"){
3207 INITIALIZER(runWaitStarted);
3208 INITIALIZER(runClearTable);
3209 STEP(runSystemRestart8);
3210 }
3211 TESTCASE("SR9",
3212 "Perform partition win system restart with other nodes delayed\n"
3213 "* 1. Start transaction\n"
3214 "* 2. insert (1,1)\n"
3215 "* 3. update (1,2)\n"
3216 "* 4. start lcp\n"
3217 "* 5. commit\n"
3218 "* 6. restart\n"){
3219 INITIALIZER(runWaitStarted);
3220 INITIALIZER(runClearTable);
3221 STEP(runSystemRestart9);
3222 }
3223 TESTCASE("SR10",
3224 "More tests of partitioned system restarts\n")
3225 {
3226 INITIALIZER(runWaitStarted);
3227 INITIALIZER(runClearTable);
3228 STEP(runSystemRestart10);
3229 }
3230 TESTCASE("Bug18385",
3231 "Perform partition system restart with other nodes with higher GCI"){
3232 INITIALIZER(runWaitStarted);
3233 INITIALIZER(runClearTable);
3234 STEP(runBug18385);
3235 }
3236 TESTCASE("Bug21536",
3237 "Perform partition system restart with other nodes with higher GCI"){
3238 INITIALIZER(runWaitStarted);
3239 INITIALIZER(runClearTable);
3240 STEP(runBug21536);
3241 }
3242 TESTCASE("Bug24664",
3243 "Check handling of LCP skip/keep")
3244 {
3245 INITIALIZER(runWaitStarted);
3246 INITIALIZER(runClearTable);
3247 STEP(runBug24664);
3248 }
3249 TESTCASE("Bug27434",
3250 "")
3251 {
3252 INITIALIZER(runWaitStarted);
3253 STEP(runBug27434);
3254 }
3255 TESTCASE("SR_DD_1", "")
3256 {
3257 TC_PROPERTY("ALL", 1);
3258 INITIALIZER(runWaitStarted);
3259 INITIALIZER(clearOldBackups);
3260 STEP(runStopper);
3261 STEP(runSR_DD_1);
3262 }
3263 TESTCASE("SR_DD_1b", "")
3264 {
3265 INITIALIZER(runWaitStarted);
3266 INITIALIZER(clearOldBackups);
3267 STEP(runSR_DD_1);
3268 }
3269 TESTCASE("SR_DD_1_LCP", "")
3270 {
3271 TC_PROPERTY("ALL", 1);
3272 TC_PROPERTY("LCP", 1);
3273 INITIALIZER(runWaitStarted);
3274 INITIALIZER(clearOldBackups);
3275 STEP(runStopper);
3276 STEP(runSR_DD_1);
3277 }
3278 TESTCASE("SR_DD_1b_LCP", "")
3279 {
3280 TC_PROPERTY("LCP", 1);
3281 INITIALIZER(runWaitStarted);
3282 INITIALIZER(clearOldBackups);
3283 STEP(runSR_DD_1);
3284 }
3285 TESTCASE("SR_DD_2", "")
3286 {
3287 TC_PROPERTY("ALL", 1);
3288 INITIALIZER(runWaitStarted);
3289 INITIALIZER(clearOldBackups);
3290 STEP(runStopper);
3291 STEP(runSR_DD_2);
3292 }
3293 TESTCASE("SR_DD_2b", "")
3294 {
3295 INITIALIZER(runWaitStarted);
3296 INITIALIZER(clearOldBackups);
3297 STEP(runSR_DD_2);
3298 }
3299 TESTCASE("SR_DD_2_LCP", "")
3300 {
3301 TC_PROPERTY("ALL", 1);
3302 TC_PROPERTY("LCP", 1);
3303 INITIALIZER(runWaitStarted);
3304 INITIALIZER(clearOldBackups);
3305 STEP(runStopper);
3306 STEP(runSR_DD_2);
3307 }
3308 TESTCASE("SR_DD_2b_LCP", "")
3309 {
3310 TC_PROPERTY("LCP", 1);
3311 INITIALIZER(runWaitStarted);
3312 INITIALIZER(clearOldBackups);
3313 STEP(runSR_DD_2);
3314 }
3315 TESTCASE("SR_DD_3", "")
3316 {
3317 TC_PROPERTY("ALL", 1);
3318 INITIALIZER(runWaitStarted);
3319 INITIALIZER(clearOldBackups);
3320 STEP(runStopper);
3321 STEP(runSR_DD_3);
3322 }
3323 TESTCASE("SR_DD_3b", "")
3324 {
3325 INITIALIZER(runWaitStarted);
3326 INITIALIZER(clearOldBackups);
3327 STEP(runSR_DD_3);
3328 }
3329 TESTCASE("SR_DD_3_LCP", "")
3330 {
3331 TC_PROPERTY("ALL", 1);
3332 TC_PROPERTY("LCP", 1);
3333 INITIALIZER(runWaitStarted);
3334 INITIALIZER(clearOldBackups);
3335 STEP(runStopper);
3336 STEP(runSR_DD_3);
3337 }
3338 TESTCASE("SR_DD_3b_LCP", "")
3339 {
3340 TC_PROPERTY("LCP", 1);
3341 INITIALIZER(runWaitStarted);
3342 INITIALIZER(clearOldBackups);
3343 STEP(runSR_DD_3);
3344 }
3345 TESTCASE("Bug29167", "")
3346 {
3347 INITIALIZER(runWaitStarted);
3348 STEP(runBug29167);
3349 }
3350 TESTCASE("Bug28770",
3351 "Check readTableFile1 fails, readTableFile2 succeeds\n"
3352 "1. Restart all node -nostart\n"
3353 "2. Insert error 6100 into all nodes\n"
3354 "3. Start all nodes\n"
3355 "4. Ensure cluster start\n"
3356 "5. Read and verify reocrds\n"
3357 "6. Repeat until looping is completed\n"){
3358 INITIALIZER(runWaitStarted);
3359 INITIALIZER(runClearTable);
3360 STEP(runBug28770);
3361 }
3362 TESTCASE("Bug22696", "")
3363 {
3364 INITIALIZER(runWaitStarted);
3365 INITIALIZER(runLoadTable);
3366 INITIALIZER(runBug22696);
3367 }
3368 TESTCASE("to", "Take-over during SR")
3369 {
3370 INITIALIZER(runWaitStarted);
3371 INITIALIZER(runLoadTable);
3372 INITIALIZER(runTO);
3373 }
3374 TESTCASE("basic", "")
3375 {
3376 INITIALIZER(runWaitStarted);
3377 INITIALIZER(runCreateAllTables);
3378 STEP(runBasic);
3379 FINALIZER(runDropAllTables);
3380 }
3381 TESTCASE("Bug41915", "")
3382 {
3383 TC_PROPERTY("ALL", 1);
3384 TC_PROPERTY("ERROR", 5053);
3385 TC_PROPERTY("ROWS", 30);
3386 INITIALIZER(runWaitStarted);
3387 STEP(runStopper);
3388 STEP(runSR_DD_2);
3389 }
3390 TESTCASE("Bug45154", "")
3391 {
3392 INITIALIZER(runBug45154);
3393 }
3394 TESTCASE("Bug46651", "")
3395 {
3396 INITIALIZER(runBug46651);
3397 }
3398 TESTCASE("Bug46412", "")
3399 {
3400 INITIALIZER(runBug46412);
3401 }
3402 TESTCASE("Bug48436", "")
3403 {
3404 INITIALIZER(runLoadTable);
3405 STEP(runBug48436);
3406 STEP(runScanUpdateUntilStopped);
3407 }
3408 TESTCASE("Bug54611", "")
3409 {
3410 INITIALIZER(runLoadTable);
3411 INITIALIZER(runBug54611);
3412 }
3413 TESTCASE("Bug56961", "")
3414 {
3415 INITIALIZER(runLoadTable);
3416 INITIALIZER(runBug56961);
3417 }
3418 TESTCASE("MTR_AddNodesAndRestart1",
3419 "1. Insert few rows to table"
3420 "2. Add nodes to the cluster"
3421 "3. Reorganize partition and optimize table"
3422 "Should be run only once")
3423 {
3424 ALL_TABLES();
3425 INITIALIZER(runWaitStarted);
3426 INITIALIZER(runFillTable);
3427 INITIALIZER(runAddNodes);
3428 STEP(runAlterTableAndOptimize);
3429 VERIFIER(runVerifyFilledTables);
3430 }
3431 TESTCASE("MTR_AddNodesAndRestart2",
3432 "1. Fill the table fully"
3433 "2. Add nodes to the cluster"
3434 "3. Reorganize partition and optimize table"
3435 "4. Kill 2 nodes during reorganization"
3436 "Should be run only once")
3437 {
3438 ALL_TABLES();
3439 TC_PROPERTY("NodesKilledDuringStep", true);
3440 INITIALIZER(runWaitStarted);
3441 INITIALIZER(runFillTable);
3442 INITIALIZER(runAddNodes);
3443 STEP(runAlterTableAndOptimize);
3444 STEP(runKillTwoNodes);
3445 VERIFIER(runVerifyFilledTables);
3446 }
3447 TESTCASE("RestartOneNode",
3448 "Perform one nodes restart\n"
3449 "* 1. Load data\n"
3450 "* 2. Restart 1 node\n"
3451 "* 3. Verify records\n"){
3452 INITIALIZER(runWaitStarted);
3453 STEP(runRestartOneNode);
3454 }
3455 TESTCASE("MixedModeRestart",
3456 "Perform kiiling of two node and starting them\n"
3457 "* 1. Killing two nodes of diffrent groups\n"
3458 "* 2. Starting nodes with and without --initial option\n"){
3459 INITIALIZER(runWaitStarted);
3460 STEP(runMixedModeRestart);
3461 }
3462 TESTCASE("StartWithNodeGroupZero",
3463 "check that a node doesn't always attached to group 0 while restart\n"
3464 "* 1. Finding a node of group id other then 0\n"
3465 "* 2. Restart that node\n"
3466 "* 3. Check the group id of the above node\n"){
3467 INITIALIZER(runWaitStarted);
3468 STEP(runStartWithNodeGroupZero);
3469 }
3470 TESTCASE("MixedModeRestart4Node",
3471 "Perform killing of four nodes and starting them\n"
3472 "* 1. Killing four nodes of diffrent groups\n"
3473 "* 2. Starting nodes with and without --initial option\n"){
3474 INITIALIZER(runWaitStarted);
3475 STEP(runMixedModeRestart4Node);
3476 }
3477 TESTCASE("KillMasterNodes",
3478 "perform Killing of master node and then starting them\n"
3479 "* 1. Killing only the master nodes one by one\n"
3480 "* 2. Start without --initial option\n"){
3481 INITIALIZER(runWaitStarted);
3482 STEP(runKillMasterNodes);
3483 }
3484 NDBT_TESTSUITE_END(testSystemRestart);
3485
main(int argc,const char ** argv)3486 int main(int argc, const char** argv){
3487 ndb_init();
3488 NDBT_TESTSUITE_INSTANCE(testSystemRestart);
3489 return testSystemRestart.execute(argc, argv);
3490 }
3491