1 /*
2    Copyright (c) 2003, 2021, Oracle and/or its affiliates.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 
25 #include <NDBT.hpp>
26 #include <NDBT_Test.hpp>
27 #include <HugoTransactions.hpp>
28 #include <UtilTransactions.hpp>
29 #include <NdbRestarter.hpp>
30 #include <Vector.hpp>
31 #include <signaldata/DumpStateOrd.hpp>
32 #include <NdbBackup.hpp>
33 #include <Bitmask.hpp>
34 #include <DbUtil.hpp>
35 
runLoadTable(NDBT_Context * ctx,NDBT_Step * step)36 int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){
37 
38   int records = ctx->getNumRecords();
39   HugoTransactions hugoTrans(*ctx->getTab());
40   if (hugoTrans.loadTable(GETNDB(step), records) != 0){
41     return NDBT_FAILED;
42   }
43   return NDBT_OK;
44 }
45 
runFillTable(NDBT_Context * ctx,NDBT_Step * step)46 int runFillTable(NDBT_Context* ctx, NDBT_Step* step){
47   Ndb* pNdb = GETNDB(step);
48   NdbDictionary::Table tab(*ctx->getTab());
49 
50   /* fill table until its full */
51   HugoTransactions hugoTrans(tab);
52   if(hugoTrans.fillTable(pNdb) != 0){
53     return NDBT_FAILED;
54   }
55 
56   /* store the number of rows */
57   int cnt;
58   UtilTransactions utilTrans(tab);
59   if(utilTrans.selectCount(pNdb, 0, &cnt) != 0){
60     g_err << "Select count failed." << endl;
61     return NDBT_FAILED;
62   }
63   ctx->setProperty("recordCount", cnt);
64   return NDBT_OK;
65 }
66 
runVerifyFilledTables(NDBT_Context * ctx,NDBT_Step * step)67 int runVerifyFilledTables(NDBT_Context* ctx, NDBT_Step* step)
68 {
69   /* verify the number of rows is intact */
70   Ndb* pNdb = GETNDB(step);
71   int countOld= ctx->getProperty("recordCount");
72   if (countOld == 0){
73     /* table was not filled using fillTable */
74     g_err << "Table initial row count not available" << endl;
75     return NDBT_FAILED;
76   }
77   /* ctx's tab gets invalidated in alter table reorganize partition
78           Hence reloading table again to verify */
79   const char *tableName= ctx->getTableName(0);
80   const NdbDictionary::Table* pTab =
81       NDBT_Table::discoverTableFromDb(pNdb, tableName);
82   if (pTab == NULL){
83     g_err << tableName << " was lost during the test." << endl;
84     return NDBT_FAILED;
85   }
86 
87   /* compare new record count with old */
88   int cnt;
89   UtilTransactions utilTrans(*pTab);
90   if(utilTrans.selectCount(pNdb, 0, &cnt) != 0){
91     g_err << "Select count failed." << endl;
92     return NDBT_FAILED;
93   }
94   if(cnt != countOld){
95     g_err << "Number of rows in result table different from expected" << endl;
96     return NDBT_FAILED;
97   }
98   return NDBT_OK;
99 }
100 
101 int
clearOldBackups(NDBT_Context * ctx,NDBT_Step * step)102 clearOldBackups(NDBT_Context* ctx, NDBT_Step* step)
103 {
104   NdbBackup backup;
105   backup.clearOldBackups();
106   return NDBT_OK;
107 }
108 
109 #define CHECK(b) if (!(b)) { \
110   g_err << "ERR: "<< step->getName() \
111          << " failed on line " << __LINE__ << endl; \
112   result = NDBT_FAILED; \
113   continue; }
114 
runSystemRestart1(NDBT_Context * ctx,NDBT_Step * step)115 int runSystemRestart1(NDBT_Context* ctx, NDBT_Step* step){
116   Ndb* pNdb = GETNDB(step);
117   int result = NDBT_OK;
118   int timeout = 300;
119   Uint32 loops = ctx->getNumLoops();
120   int records = ctx->getNumRecords();
121   int count;
122   NdbRestarter restarter;
123   Uint32 i = 1;
124 
125   UtilTransactions utilTrans(*ctx->getTab());
126   HugoTransactions hugoTrans(*ctx->getTab());
127   while(i<=loops && result != NDBT_FAILED){
128 
129     ndbout << "Loop " << i << "/"<< loops <<" started" << endl;
130     /*
131       1. Load data
132       2. Restart cluster and verify records
133       3. Update records
134       4. Restart cluster and verify records
135       5. Delete half of the records
136       6. Restart cluster and verify records
137       7. Delete all records
138       8. Restart cluster and verify records
139       9. Insert, update, delete records
140       10. Restart cluster and verify records
141       11. Insert, update, delete records
142       12. Restart cluster with error insert 5020 and verify records
143     */
144     ndbout << "Loading records..." << endl;
145     CHECK(hugoTrans.loadTable(pNdb, records) == 0);
146 
147     ndbout << "Restarting cluster" << endl;
148     CHECK(restarter.restartAll() == 0);
149     CHECK(restarter.waitClusterStarted(timeout) == 0);
150     CHECK(pNdb->waitUntilReady(timeout) == 0);
151 
152     ndbout << "Verifying records..." << endl;
153     CHECK(hugoTrans.pkReadRecords(pNdb, records) == 0);
154     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
155     CHECK(count == records);
156 
157     ndbout << "Updating records..." << endl;
158     CHECK(hugoTrans.pkUpdateRecords(pNdb, records) == 0);
159 
160     ndbout << "Restarting cluster..." << endl;
161     CHECK(restarter.restartAll() == 0);
162     CHECK(restarter.waitClusterStarted(timeout) == 0);
163     CHECK(pNdb->waitUntilReady(timeout) == 0);
164 
165     ndbout << "Verifying records..." << endl;
166     CHECK(hugoTrans.pkReadRecords(pNdb, records) == 0);
167     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
168     CHECK(count == records);
169 
170     ndbout << "Deleting 50% of records..." << endl;
171     CHECK(hugoTrans.pkDelRecords(pNdb, records/2) == 0);
172 
173     ndbout << "Restarting cluster..." << endl;
174     CHECK(restarter.restartAll() == 0);
175     CHECK(restarter.waitClusterStarted(timeout) == 0);
176     CHECK(pNdb->waitUntilReady(timeout) == 0);
177 
178     ndbout << "Verifying records..." << endl;
179     CHECK(hugoTrans.scanReadRecords(pNdb, records/2, 0, 64) == 0);
180     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
181     CHECK(count == (records/2));
182 
183     ndbout << "Deleting all records..." << endl;
184     CHECK(utilTrans.clearTable(pNdb, records/2) == 0);
185 
186     ndbout << "Restarting cluster..." << endl;
187     CHECK(restarter.restartAll() == 0);
188     CHECK(restarter.waitClusterStarted(timeout) == 0);
189     CHECK(pNdb->waitUntilReady(timeout) == 0);
190 
191     ndbout << "Verifying records..." << endl;
192     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
193     CHECK(count == 0);
194 
195     ndbout << "Doing it all..." << endl;
196     CHECK(hugoTrans.loadTable(pNdb, records) == 0);
197     CHECK(hugoTrans.pkUpdateRecords(pNdb, records) == 0);
198     CHECK(hugoTrans.pkDelRecords(pNdb, records/2) == 0);
199     CHECK(hugoTrans.scanUpdateRecords(pNdb, records/2) == 0);
200     CHECK(utilTrans.clearTable(pNdb, records) == 0);
201     CHECK(hugoTrans.loadTable(pNdb, records) == 0);
202     CHECK(utilTrans.clearTable(pNdb, records) == 0);
203     CHECK(hugoTrans.loadTable(pNdb, records) == 0);
204     CHECK(hugoTrans.pkUpdateRecords(pNdb, records) == 0);
205     CHECK(utilTrans.clearTable(pNdb, records) == 0);
206 
207     ndbout << "Restarting cluster..." << endl;
208     CHECK(restarter.restartAll() == 0);
209     CHECK(restarter.waitClusterStarted(timeout) == 0);
210     CHECK(pNdb->waitUntilReady(timeout) == 0);
211 
212     ndbout << "Verifying records..." << endl;
213     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
214     CHECK(count == 0);
215 
216     ndbout << "Doing it all..." << endl;
217     CHECK(hugoTrans.loadTable(pNdb, records) == 0);
218     CHECK(hugoTrans.pkUpdateRecords(pNdb, records) == 0);
219     CHECK(hugoTrans.pkDelRecords(pNdb, records/2) == 0);
220     CHECK(hugoTrans.scanUpdateRecords(pNdb, records/2) == 0);
221     CHECK(utilTrans.clearTable(pNdb, records) == 0);
222     CHECK(hugoTrans.loadTable(pNdb, records) == 0);
223     CHECK(utilTrans.clearTable(pNdb, records) == 0);
224 
225     ndbout << "Restarting cluster with error insert 5020..." << endl;
226     CHECK(restarter.restartAll(false, true) == 0);
227     CHECK(restarter.waitClusterNoStart(timeout) == 0);
228     CHECK(restarter.insertErrorInAllNodes(5020) == 0);
229     CHECK(restarter.startAll() == 0);
230     CHECK(restarter.waitClusterStarted(timeout) == 0);
231     CHECK(pNdb->waitUntilReady(timeout) == 0);
232 
233     i++;
234   }
235 
236   ndbout << "runSystemRestart1 finished" << endl;
237 
238   return result;
239 }
240 
runSystemRestart2(NDBT_Context * ctx,NDBT_Step * step)241 int runSystemRestart2(NDBT_Context* ctx, NDBT_Step* step){
242   Ndb* pNdb = GETNDB(step);
243   int result = NDBT_OK;
244 ///  int timeout = 300;
245   int timeout = 120;
246   Uint32 loops = ctx->getNumLoops();
247   int records = ctx->getNumRecords();
248   int count;
249   NdbRestarter restarter;
250   Uint32 i = 1;
251 
252   UtilTransactions utilTrans(*ctx->getTab());
253   HugoTransactions hugoTrans(*ctx->getTab());
254   while(i<=loops && result != NDBT_FAILED && !ctx->isTestStopped()){
255 
256     ndbout << "Loop " << i << "/"<< loops <<" started" << endl;
257     /* Use error 7070 to set time between LCP to it's min value
258       1. Load data
259       2. Restart cluster and verify records
260       3. Update records
261       4. Restart cluster and verify records
262       5. Delete half of the records
263       6. Restart cluster and verify records
264       7. Delete all records
265       8. Restart cluster and verify records
266       9. Insert, update, delete records
267       10. Restart cluster and verify records
268     */
269     int val = DumpStateOrd::DihMinTimeBetweenLCP;
270     CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
271 
272     ndbout << "Loading records..." << endl;
273     CHECK(hugoTrans.loadTable(pNdb, records) == 0);
274 
275     ndbout << "Restarting cluster" << endl;
276     CHECK(restarter.restartAll() == 0);
277     CHECK(restarter.waitClusterStarted(timeout) == 0);
278     {
279       int val = DumpStateOrd::DihMinTimeBetweenLCP;
280       CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
281     }
282     CHECK(pNdb->waitUntilReady(timeout) == 0);
283 
284     ndbout << "Verifying records..." << endl;
285     CHECK(hugoTrans.pkReadRecords(pNdb, records) == 0);
286     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
287     CHECK(count == records);
288 
289     ndbout << "Updating records..." << endl;
290     CHECK(hugoTrans.pkUpdateRecords(pNdb, records) == 0);
291 
292     ndbout << "Restarting cluster..." << endl;
293     CHECK(restarter.restartAll() == 0);
294     CHECK(restarter.waitClusterStarted(timeout) == 0);
295     {
296       int val = DumpStateOrd::DihMinTimeBetweenLCP;
297       CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
298     }
299     CHECK(pNdb->waitUntilReady(timeout) == 0);
300 
301     ndbout << "Verifying records..." << endl;
302     CHECK(hugoTrans.pkReadRecords(pNdb, records) == 0);
303     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
304     CHECK(count == records);
305 
306     ndbout << "Deleting 50% of records..." << endl;
307     CHECK(hugoTrans.pkDelRecords(pNdb, records/2) == 0);
308 
309     ndbout << "Restarting cluster..." << endl;
310     CHECK(restarter.restartAll() == 0);
311     CHECK(restarter.waitClusterStarted(timeout) == 0);
312     {
313       int val = DumpStateOrd::DihMinTimeBetweenLCP;
314       CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
315     }
316     CHECK(pNdb->waitUntilReady(timeout) == 0);
317 
318     ndbout << "Verifying records..." << endl;
319     CHECK(hugoTrans.scanReadRecords(pNdb, records/2, 0, 64) == 0);
320     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
321     CHECK(count == (records/2));
322 
323     ndbout << "Deleting all records..." << endl;
324     CHECK(utilTrans.clearTable(pNdb, records/2) == 0);
325 
326     ndbout << "Restarting cluster..." << endl;
327     CHECK(restarter.restartAll() == 0);
328     CHECK(restarter.waitClusterStarted(timeout) == 0);
329     {
330       int val = DumpStateOrd::DihMinTimeBetweenLCP;
331       CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
332     }
333     CHECK(pNdb->waitUntilReady(timeout) == 0);
334 
335     ndbout << "Verifying records..." << endl;
336     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
337     CHECK(count == 0);
338 
339     ndbout << "Doing it all..." << endl;
340     CHECK(hugoTrans.loadTable(pNdb, records) == 0);
341     CHECK(hugoTrans.pkUpdateRecords(pNdb, records) == 0);
342     CHECK(hugoTrans.pkDelRecords(pNdb, records/2) == 0);
343     CHECK(hugoTrans.scanUpdateRecords(pNdb, records/2) == 0);
344     CHECK(utilTrans.clearTable(pNdb, records) == 0);
345     CHECK(hugoTrans.loadTable(pNdb, records) == 0);
346     CHECK(utilTrans.clearTable(pNdb, records) == 0);
347     CHECK(hugoTrans.loadTable(pNdb, records) == 0);
348     CHECK(hugoTrans.pkUpdateRecords(pNdb, records) == 0);
349     CHECK(utilTrans.clearTable(pNdb, records) == 0);
350 
351     ndbout << "Restarting cluster..." << endl;
352     CHECK(restarter.restartAll() == 0);
353     CHECK(restarter.waitClusterStarted(timeout) == 0);
354     {
355       int val = DumpStateOrd::DihMinTimeBetweenLCP;
356       CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
357     }
358     CHECK(pNdb->waitUntilReady(timeout) == 0);
359 
360     ndbout << "Verifying records..." << endl;
361     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
362     CHECK(count == 0);
363 
364     i++;
365   }
366 
367   ndbout << "runSystemRestart2 finished" << endl;
368 
369   return result;
370 }
371 
runSystemRestartTestUndoLog(NDBT_Context * ctx,NDBT_Step * step)372 int runSystemRestartTestUndoLog(NDBT_Context* ctx, NDBT_Step* step){
373   Ndb* pNdb = GETNDB(step);
374   int result = NDBT_OK;
375   int timeout = 300;
376   Uint32 loops = ctx->getNumLoops();
377   int records = ctx->getNumRecords();
378   int count;
379   NdbRestarter restarter;
380   Uint32 i = 1;
381 
382   int dump7080[2];
383   dump7080[0] = 7080;
384   dump7080[1] = ctx->getTab()->getTableId();
385 
386   UtilTransactions utilTrans(*ctx->getTab());
387   HugoTransactions hugoTrans(*ctx->getTab());
388   while(i<=loops && result != NDBT_FAILED){
389 
390     ndbout << "Loop " << i << "/"<< loops <<" started" << endl;
391     /*
392       1. Start LCP, turn on undologging but delay write of datapages.
393       2. Insert, update, delete records
394       3. Complete writing of data pages and finish LCP.
395       4. Restart cluster and verify records
396     */
397     // Use dump state 7080 to delay writing of datapages
398     // for the current table
399     ndbout << "Dump state: "<<dump7080[0]<<", "<<dump7080[1]<<endl;
400     CHECK(restarter.dumpStateAllNodes(dump7080, 2) == 0);
401     NdbSleep_SecSleep(10);
402 
403     ndbout << "Doing it all..." << endl;
404     CHECK(hugoTrans.loadTable(pNdb, records) == 0);
405     CHECK(hugoTrans.pkUpdateRecords(pNdb, records) == 0);
406     CHECK(hugoTrans.pkDelRecords(pNdb, records/2) == 0);
407     CHECK(hugoTrans.scanUpdateRecords(pNdb, records/2) == 0);
408     CHECK(utilTrans.clearTable(pNdb, records) == 0);
409     CHECK(hugoTrans.loadTable(pNdb, records) == 0);
410     CHECK(utilTrans.clearTable(pNdb, records) == 0);
411 
412     // Reset error and let LCP continue
413     CHECK(restarter.insertErrorInAllNodes(0) == 0);
414     NdbSleep_SecSleep(60);
415 
416     ndbout << "Restarting cluster..." << endl;
417     CHECK(restarter.restartAll() == 0);
418     CHECK(restarter.waitClusterStarted(timeout) == 0);
419     CHECK(pNdb->waitUntilReady(timeout) == 0);
420 
421     ndbout << "Verifying records..." << endl;
422     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
423     CHECK(count == 0);
424 
425     // Use dump state 7080 to delay writing of datapages
426     // for the current table
427     ndbout << "Dump state: "<<dump7080[0]<<", "<<dump7080[1]<<endl;
428     CHECK(restarter.dumpStateAllNodes(dump7080, 2) == 0);
429     NdbSleep_SecSleep(10);
430 
431     ndbout << "Doing it all, delete 50%..." << endl;
432     CHECK(hugoTrans.loadTable(pNdb, records) == 0);
433     CHECK(hugoTrans.pkUpdateRecords(pNdb, records) == 0);
434     CHECK(hugoTrans.pkDelRecords(pNdb, records/2) == 0);
435 
436     // Reset error and let LCP continue
437     CHECK(restarter.insertErrorInAllNodes(0) == 0);
438     NdbSleep_SecSleep(20);
439 
440     ndbout << "Restarting cluster..." << endl;
441     CHECK(restarter.restartAll() == 0);
442     CHECK(restarter.waitClusterStarted(timeout) == 0);
443     CHECK(pNdb->waitUntilReady(timeout) == 0);
444 
445     ndbout << "Verifying records..." << endl;
446     CHECK(hugoTrans.scanReadRecords(pNdb, records/2, 0, 64) == 0);
447     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
448     CHECK(count == (records/2));
449     CHECK(utilTrans.clearTable(pNdb, records) == 0);
450 
451     i++;
452   }
453 
454   ndbout << "runSystemRestartTestUndoLog finished" << endl;
455 
456   return result;
457 }
458 
runSystemRestartTestFullDb(NDBT_Context * ctx,NDBT_Step * step)459 int runSystemRestartTestFullDb(NDBT_Context* ctx, NDBT_Step* step){
460   Ndb* pNdb = GETNDB(step);
461   int result = NDBT_OK;
462   int timeout = 300;
463   Uint32 loops = ctx->getNumLoops();
464   int count1, count2;
465   NdbRestarter restarter;
466   Uint32 i = 1;
467 
468   UtilTransactions utilTrans(*ctx->getTab());
469   HugoTransactions hugoTrans(*ctx->getTab());
470   while(i<=loops && result != NDBT_FAILED){
471 
472     ndbout << "Loop " << i << "/"<< loops <<" started" << endl;
473     /*
474       1. Load data until db reports it's full
475       2. Restart cluster and verify records
476     */
477     ndbout << "Filling up table..." << endl;
478     CHECK(hugoTrans.fillTable(pNdb) == 0);
479     CHECK(utilTrans.selectCount(pNdb, 64, &count1) == 0);
480     ndbout << "Db is full. Table has "<<count1 <<" records."<< endl;
481 
482     ndbout << "Restarting cluster" << endl;
483     CHECK(restarter.restartAll() == 0);
484     CHECK(restarter.waitClusterStarted(timeout) == 0);
485     CHECK(pNdb->waitUntilReady(timeout) == 0);
486 
487     ndbout << "Verifying records..." << endl;
488     CHECK(hugoTrans.scanReadRecords(pNdb, count1) == 0);
489     CHECK(utilTrans.selectCount(pNdb, 64, &count2) == 0);
490     CHECK(count1 == count2);
491 
492     ndbout << "Deleting all records..." << endl;
493     CHECK(utilTrans.clearTable2(pNdb, count1) == 0);
494 
495     ndbout << "Restarting cluster..." << endl;
496     CHECK(restarter.restartAll() == 0);
497     CHECK(restarter.waitClusterStarted(timeout) == 0);
498     CHECK(pNdb->waitUntilReady(timeout) == 0);
499 
500     ndbout << "Verifying records..." << endl;
501     CHECK(utilTrans.selectCount(pNdb, 64, &count1) == 0);
502     CHECK(count1 == 0);
503 
504     i++;
505   }
506 
507   ndbout << "runSystemRestartTestFullDb finished" << endl;
508 
509   return result;
510 }
511 
runSystemRestart3(NDBT_Context * ctx,NDBT_Step * step)512 int runSystemRestart3(NDBT_Context* ctx, NDBT_Step* step){
513   Ndb* pNdb = GETNDB(step);
514   int result = NDBT_OK;
515   int timeout = 300;
516   Uint32 loops = ctx->getNumLoops();
517   int records = ctx->getNumRecords();
518   int count;
519   NdbRestarter restarter;
520   Uint32 i = 1;
521 
522   const Uint32 nodeCount = restarter.getNumDbNodes();
523   if(nodeCount < 2){
524     g_info << "SR3 - Needs atleast 2 nodes to test" << endl;
525     return NDBT_OK;
526   }
527 
528   Vector<int> nodeIds;
529   for(i = 0; i<nodeCount; i++)
530     nodeIds.push_back(restarter.getDbNodeId(i));
531 
532   Uint32 currentRestartNodeIndex = 0;
533   UtilTransactions utilTrans(*ctx->getTab());
534   HugoTransactions hugoTrans(*ctx->getTab());
535 
536   while(i<=loops && result != NDBT_FAILED){
537 
538     g_info << "Loop " << i << "/"<< loops <<" started" << endl;
539     /**
540      * 1. Load data
541      * 2. Restart 1 node -nostart
542      * 3. Update records
543      * 4. Restart cluster and verify records
544      * 5. Restart 1 node -nostart
545      * 6. Delete half of the records
546      * 7. Restart cluster and verify records
547      * 8. Restart 1 node -nostart
548      * 9. Delete all records
549      * 10. Restart cluster and verify records
550      */
551     g_info << "Loading records..." << endl;
552     CHECK(hugoTrans.loadTable(pNdb, records) == 0);
553 
554     /*** 1 ***/
555     g_info << "1 - Stopping one node" << endl;
556     CHECK(restarter.restartOneDbNode(nodeIds[currentRestartNodeIndex],
557 				     false,
558 				     true,
559 				     false) == 0);
560     currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
561 
562     g_info << "Updating records..." << endl;
563     CHECK(hugoTrans.pkUpdateRecords(pNdb, records) == 0);
564 
565     g_info << "Restarting cluster..." << endl;
566     CHECK(restarter.restartAll() == 0);
567     CHECK(restarter.waitClusterStarted(timeout) == 0);
568     CHECK(pNdb->waitUntilReady(timeout) == 0);
569 
570     g_info << "Verifying records..." << endl;
571     CHECK(hugoTrans.pkReadRecords(pNdb, records) == 0);
572     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
573     CHECK(count == records);
574 
575     g_info << "2 - Stopping one node" << endl;
576     CHECK(restarter.restartOneDbNode(nodeIds[currentRestartNodeIndex],
577 				     false,
578 				     true,
579 				     false) == 0);
580     currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
581 
582     g_info << "Deleting 50% of records..." << endl;
583     CHECK(hugoTrans.pkDelRecords(pNdb, records/2) == 0);
584 
585     g_info << "Restarting cluster..." << endl;
586     CHECK(restarter.restartAll() == 0);
587     CHECK(restarter.waitClusterStarted(timeout) == 0);
588     CHECK(pNdb->waitUntilReady(timeout) == 0);
589 
590     g_info << "Verifying records..." << endl;
591     CHECK(hugoTrans.scanReadRecords(pNdb, records/2, 0, 64) == 0);
592     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
593     CHECK(count == (records/2));
594 
595     g_info << "3 - Stopping one node" << endl;
596     CHECK(restarter.restartOneDbNode(nodeIds[currentRestartNodeIndex],
597 				     false,
598 				     true,
599 				     false) == 0);
600     currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
601     g_info << "Deleting all records..." << endl;
602     CHECK(utilTrans.clearTable(pNdb, records/2) == 0);
603 
604     g_info << "Restarting cluster..." << endl;
605     CHECK(restarter.restartAll() == 0);
606     CHECK(restarter.waitClusterStarted(timeout) == 0);
607     CHECK(pNdb->waitUntilReady(timeout) == 0);
608 
609     ndbout << "Verifying records..." << endl;
610     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
611     CHECK(count == 0);
612 
613     i++;
614   }
615 
616   g_info << "runSystemRestart3 finished" << endl;
617 
618   return result;
619 }
620 
runSystemRestart4(NDBT_Context * ctx,NDBT_Step * step)621 int runSystemRestart4(NDBT_Context* ctx, NDBT_Step* step){
622   Ndb* pNdb = GETNDB(step);
623   int result = NDBT_OK;
624   int timeout = 300;
625   Uint32 loops = ctx->getNumLoops();
626   int records = ctx->getNumRecords();
627   int count;
628   NdbRestarter restarter;
629   Uint32 i = 1;
630 
631   const Uint32 nodeCount = restarter.getNumDbNodes();
632   if(nodeCount < 2){
633     g_info << "SR4 - Needs atleast 2 nodes to test" << endl;
634     return NDBT_OK;
635   }
636 
637   Vector<int> nodeIds;
638   for(i = 0; i<nodeCount; i++)
639     nodeIds.push_back(restarter.getDbNodeId(i));
640 
641   Uint32 currentRestartNodeIndex = 0;
642   UtilTransactions utilTrans(*ctx->getTab());
643   HugoTransactions hugoTrans(*ctx->getTab());
644 
645   {
646     int val = DumpStateOrd::DihMinTimeBetweenLCP;
647     if(restarter.dumpStateAllNodes(&val, 1) != 0){
648       g_err << "ERR: "<< step->getName()
649 	    << " failed on line " << __LINE__ << endl;
650       return NDBT_FAILED;
651     }
652   }
653 
654   while(i<=loops && result != NDBT_FAILED){
655 
656     g_info << "Loop " << i << "/"<< loops <<" started" << endl;
657     /**
658      * 1. Load data
659      * 2. Restart 1 node -nostart
660      * 3. Update records
661      * 4. Restart cluster and verify records
662      * 5. Restart 1 node -nostart
663      * 6. Delete half of the records
664      * 7. Restart cluster and verify records
665      * 8. Restart 1 node -nostart
666      * 9. Delete all records
667      * 10. Restart cluster and verify records
668      */
669     g_info << "Loading records..." << endl;
670     CHECK(hugoTrans.loadTable(pNdb, records) == 0);
671 
672     /*** 1 ***/
673     g_info << "1 - Stopping one node" << endl;
674     CHECK(restarter.restartOneDbNode(nodeIds[currentRestartNodeIndex],
675 				     false,
676 				     true,
677 				     false) == 0);
678     currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
679 
680     g_info << "Updating records..." << endl;
681     CHECK(hugoTrans.pkUpdateRecords(pNdb, records) == 0);
682 
683     g_info << "Restarting cluster..." << endl;
684     CHECK(restarter.restartAll() == 0);
685     CHECK(restarter.waitClusterStarted(timeout) == 0);
686     {
687       int val = DumpStateOrd::DihMinTimeBetweenLCP;
688       CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
689     }
690     CHECK(pNdb->waitUntilReady(timeout) == 0);
691 
692     g_info << "Verifying records..." << endl;
693     CHECK(hugoTrans.pkReadRecords(pNdb, records) == 0);
694     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
695     CHECK(count == records);
696 
697     g_info << "2 - Stopping one node" << endl;
698     CHECK(restarter.restartOneDbNode(nodeIds[currentRestartNodeIndex],
699 				     false,
700 				     true,
701 				     false) == 0);
702     currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
703 
704     g_info << "Deleting 50% of records..." << endl;
705     CHECK(hugoTrans.pkDelRecords(pNdb, records/2) == 0);
706 
707     g_info << "Restarting cluster..." << endl;
708     CHECK(restarter.restartAll() == 0);
709     CHECK(restarter.waitClusterStarted(timeout) == 0);
710     {
711       int val = DumpStateOrd::DihMinTimeBetweenLCP;
712       CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
713     }
714     CHECK(pNdb->waitUntilReady(timeout) == 0);
715 
716     g_info << "Verifying records..." << endl;
717     CHECK(hugoTrans.scanReadRecords(pNdb, records/2, 0, 64) == 0);
718     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
719     CHECK(count == (records/2));
720 
721     g_info << "3 - Stopping one node" << endl;
722     CHECK(restarter.restartOneDbNode(nodeIds[currentRestartNodeIndex],
723 				     false,
724 				     true,
725 				     false) == 0);
726     currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
727     g_info << "Deleting all records..." << endl;
728     CHECK(utilTrans.clearTable(pNdb, records/2) == 0);
729 
730     g_info << "Restarting cluster..." << endl;
731     CHECK(restarter.restartAll() == 0);
732     CHECK(restarter.waitClusterStarted(timeout) == 0);
733     {
734       int val = DumpStateOrd::DihMinTimeBetweenLCP;
735       CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
736     }
737     CHECK(pNdb->waitUntilReady(timeout) == 0);
738 
739     ndbout << "Verifying records..." << endl;
740     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
741     CHECK(count == 0);
742 
743     i++;
744   }
745 
746   g_info << "runSystemRestart4 finished" << endl;
747 
748   return result;
749 }
750 
runSystemRestart5(NDBT_Context * ctx,NDBT_Step * step)751 int runSystemRestart5(NDBT_Context* ctx, NDBT_Step* step){
752   Ndb* pNdb = GETNDB(step);
753   int result = NDBT_OK;
754   int timeout = 300;
755   Uint32 loops = ctx->getNumLoops();
756   int records = ctx->getNumRecords();
757   int count;
758   NdbRestarter restarter;
759   Uint32 i = 1;
760 
761   const Uint32 nodeCount = restarter.getNumDbNodes();
762   if(nodeCount < 2){
763     g_info << "SR5 - Needs atleast 2 nodes to test" << endl;
764     return NDBT_OK;
765   }
766 
767   Vector<int> nodeIds;
768   for(i = 0; i<nodeCount; i++)
769     nodeIds.push_back(restarter.getDbNodeId(i));
770 
771   Uint32 currentRestartNodeIndex = 0;
772   UtilTransactions utilTrans(*ctx->getTab());
773   HugoTransactions hugoTrans(*ctx->getTab());
774 
775   {
776     int val = DumpStateOrd::DihMinTimeBetweenLCP;
777     if(restarter.dumpStateAllNodes(&val, 1) != 0){
778       g_err << "ERR: "<< step->getName()
779 	    << " failed on line " << __LINE__ << endl;
780       return NDBT_FAILED;
781     }
782   }
783 
784   while(i<=loops && result != NDBT_FAILED){
785 
786     g_info << "Loop " << i << "/"<< loops <<" started" << endl;
787     /**
788      * 1. Load data
789      * 2. Restart 1 node -nostart
790      * 3. Update records
791      * 4. Restart cluster and verify records
792      * 5. Restart 1 node -nostart
793      * 6. Delete half of the records
794      * 7. Restart cluster and verify records
795      * 8. Restart 1 node -nostart
796      * 9. Delete all records
797      * 10. Restart cluster and verify records
798      */
799     g_info << "Loading records..." << endl;
800     hugoTrans.loadTable(pNdb, records);
801 
802     /*** 1 ***/
803     g_info << "1 - Stopping one node" << endl;
804     CHECK(restarter.restartOneDbNode(nodeIds[currentRestartNodeIndex],
805 				     false,
806 				     true,
807 				     false) == 0);
808     currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
809 
810     g_info << "Updating records..." << endl;
811     hugoTrans.pkUpdateRecords(pNdb, records);
812 
813     g_info << "Restarting cluster..." << endl;
814     CHECK(restarter.restartAll(false, false, true) == 0);
815     CHECK(restarter.waitClusterStarted(timeout) == 0);
816     {
817       int val = DumpStateOrd::DihMinTimeBetweenLCP;
818       CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
819     }
820     CHECK(pNdb->waitUntilReady(timeout) == 0);
821 
822     g_info << "Verifying records..." << endl;
823     hugoTrans.pkReadRecords(pNdb, records);
824     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
825     //CHECK(count == records);
826 
827     g_info << "2 - Stopping one node" << endl;
828     CHECK(restarter.restartOneDbNode(nodeIds[currentRestartNodeIndex],
829 				     false,
830 				     true,
831 				     false) == 0);
832     currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
833 
834     g_info << "Deleting 50% of records..." << endl;
835     hugoTrans.pkDelRecords(pNdb, records/2);
836 
837     g_info << "Restarting cluster..." << endl;
838     CHECK(restarter.restartAll(false, false, true) == 0);
839     CHECK(restarter.waitClusterStarted(timeout) == 0);
840     {
841       int val = DumpStateOrd::DihMinTimeBetweenLCP;
842       CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
843     }
844     CHECK(pNdb->waitUntilReady(timeout) == 0);
845 
846     g_info << "Verifying records..." << endl;
847     hugoTrans.scanReadRecords(pNdb, records/2, 0, 64);
848     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
849     //CHECK(count == (records/2));
850 
851     g_info << "3 - Stopping one node" << endl;
852     CHECK(restarter.restartOneDbNode(nodeIds[currentRestartNodeIndex],
853 				     false,
854 				     true,
855 				     false) == 0);
856     currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
857     g_info << "Deleting all records..." << endl;
858     utilTrans.clearTable(pNdb, records/2);
859 
860     g_info << "Restarting cluster..." << endl;
861     CHECK(restarter.restartAll(false, false, true) == 0);
862     CHECK(restarter.waitClusterStarted(timeout) == 0);
863     {
864       int val = DumpStateOrd::DihMinTimeBetweenLCP;
865       CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
866     }
867     CHECK(pNdb->waitUntilReady(timeout) == 0);
868 
869     ndbout << "Verifying records..." << endl;
870     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
871     //CHECK(count == 0);
872 
873     CHECK(utilTrans.clearTable(pNdb) == 0);
874     i++;
875   }
876 
877   g_info << "runSystemRestart5 finished" << endl;
878 
879   return result;
880 }
881 
runSystemRestart6(NDBT_Context * ctx,NDBT_Step * step)882 int runSystemRestart6(NDBT_Context* ctx, NDBT_Step* step){
883   Ndb* pNdb = GETNDB(step);
884   int result = NDBT_OK;
885   int timeout = 300;
886   Uint32 loops = ctx->getNumLoops();
887   int records = ctx->getNumRecords();
888   NdbRestarter restarter;
889   Uint32 i = 1;
890 
891   const Uint32 nodeCount = restarter.getNumDbNodes();
892   if(nodeCount < 2){
893     g_info << "SR6 - Needs atleast 2 nodes to test" << endl;
894     return NDBT_OK;
895   }
896 
897   Vector<int> nodeIds;
898   for(i = 0; i<nodeCount; i++)
899     nodeIds.push_back(restarter.getDbNodeId(i));
900 
901   Uint32 currentRestartNodeIndex = 0;
902   UtilTransactions utilTrans(*ctx->getTab());
903   HugoTransactions hugoTrans(*ctx->getTab());
904 
905   while(i<=loops && result != NDBT_FAILED){
906 
907     g_info << "Loop " << i << "/"<< loops <<" started" << endl;
908     /**
909      * 1. Load data
910      * 2. Restart all node -nostart
911      * 3. Restart some nodes -i -nostart
912      * 4. Start all nodes verify records
913      */
914     g_info << "Loading records..." << endl;
915     hugoTrans.loadTable(pNdb, records);
916 
917     CHECK(restarter.restartAll(false, true, false) == 0);
918 
919     Uint32 nodeId = nodeIds[currentRestartNodeIndex];
920     currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
921 
922     CHECK(restarter.restartOneDbNode(nodeId, true, true,false) == 0);
923     CHECK(restarter.waitClusterNoStart(timeout) == 0);
924     CHECK(restarter.startAll() == 0);
925     CHECK(restarter.waitClusterStarted(timeout) == 0);
926     CHECK(pNdb->waitUntilReady(timeout) == 0);
927     int count = records - 1;
928     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
929     CHECK(count == records);
930     CHECK(utilTrans.clearTable(pNdb) == 0);
931     i++;
932   }
933 
934   g_info << "runSystemRestart6 finished" << endl;
935 
936   return result;
937 }
938 
runSystemRestart7(NDBT_Context * ctx,NDBT_Step * step)939 int runSystemRestart7(NDBT_Context* ctx, NDBT_Step* step){
940   Ndb* pNdb = GETNDB(step);
941   int result = NDBT_OK;
942   Uint32 loops = ctx->getNumLoops();
943   int records = ctx->getNumRecords();
944   NdbRestarter restarter;
945   Uint32 i = 1;
946 
947   const Uint32 nodeCount = restarter.getNumDbNodes();
948   if(nodeCount < 2){
949     g_info << "SR7 - Needs atleast 2 nodes to test" << endl;
950     return NDBT_OK;
951   }
952 
953   Vector<int> nodeIds;
954   for(i = 0; i<nodeCount; i++)
955     nodeIds.push_back(restarter.getDbNodeId(i));
956 
957   int a_nodeIds[64];
958   if(nodeCount > 64)
959     abort();
960 
961   Uint32 currentRestartNodeIndex = 1;
962   UtilTransactions utilTrans(*ctx->getTab());
963   HugoTransactions hugoTrans(*ctx->getTab());
964 
965   while(i<=loops && result != NDBT_FAILED){
966 
967     g_info << "Loop " << i << "/"<< loops <<" started" << endl;
968     /**
969      * 1. Load data
970      * 2. Restart all node -nostart
971      * 3. Start all but one node
972      * 4. Wait for startphase >= 2
973      * 5. Start last node
974      * 6. Verify records
975      */
976     g_info << "Loading records..." << endl;
977     hugoTrans.loadTable(pNdb, records);
978 
979     CHECK(restarter.restartAll(false, true, false) == 0);
980 
981     int nodeId = nodeIds[currentRestartNodeIndex];
982     currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
983 
984     Uint32 j = 0;
985     for(Uint32 k = 0; k<nodeCount; k++){
986       if(nodeIds[k] != nodeId){
987 	a_nodeIds[j++] = nodeIds[k];
988       }
989     }
990 
991     CHECK(restarter.startNodes(a_nodeIds, nodeCount - 1) == 0);
992     CHECK(restarter.waitNodesStarted(a_nodeIds, nodeCount - 1, 120) == 0);
993     CHECK(pNdb->waitUntilReady(5) == 0);
994     int count = records - 1;
995     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
996     CHECK(count == records);
997 
998     CHECK(restarter.startNodes(&nodeId, 1) == 0);
999     CHECK(restarter.waitNodesStarted(&nodeId, 1, 120) == 0);
1000 
1001     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
1002     CHECK(count == records);
1003     CHECK(utilTrans.clearTable(pNdb) == 0);
1004 
1005     i++;
1006   }
1007 
1008   g_info << "runSystemRestart7 finished" << endl;
1009 
1010   return result;
1011 }
1012 
runSystemRestart8(NDBT_Context * ctx,NDBT_Step * step)1013 int runSystemRestart8(NDBT_Context* ctx, NDBT_Step* step){
1014   Ndb* pNdb = GETNDB(step);
1015   int result = NDBT_OK;
1016   int timeout = 300;
1017   Uint32 loops = ctx->getNumLoops();
1018   int records = ctx->getNumRecords();
1019   NdbRestarter restarter;
1020   Uint32 i = 1;
1021 
1022   const Uint32 nodeCount = restarter.getNumDbNodes();
1023   if(nodeCount < 2){
1024     g_info << "SR8 - Needs atleast 2 nodes to test" << endl;
1025     return NDBT_OK;
1026   }
1027 
1028   Vector<int> nodeIds;
1029   for(i = 0; i<nodeCount; i++)
1030     nodeIds.push_back(restarter.getDbNodeId(i));
1031 
1032   int a_nodeIds[64];
1033   if(nodeCount > 64)
1034     abort();
1035 
1036   Uint32 currentRestartNodeIndex = 1;
1037   UtilTransactions utilTrans(*ctx->getTab());
1038   HugoTransactions hugoTrans(*ctx->getTab());
1039 
1040   while(i<=loops && result != NDBT_FAILED){
1041 
1042     g_info << "Loop " << i << "/"<< loops <<" started" << endl;
1043     /**
1044      * 1. Load data
1045      * 2. Restart all node -nostart
1046      * 3. Start all but one node
1047      * 4. Verify records
1048      * 5. Start last node
1049      * 6. Verify records
1050      */
1051     g_info << "Loading records..." << endl;
1052     hugoTrans.loadTable(pNdb, records);
1053 
1054     CHECK(restarter.restartAll(false, true, false) == 0);
1055 
1056     int nodeId = nodeIds[currentRestartNodeIndex];
1057     currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
1058 
1059     Uint32 j = 0;
1060     for(Uint32 k = 0; k<nodeCount; k++){
1061       if(nodeIds[k] != nodeId){
1062 	a_nodeIds[j++] = nodeIds[k];
1063       }
1064     }
1065 
1066     CHECK(restarter.startNodes(a_nodeIds, nodeCount-1) == 0);
1067     CHECK(restarter.waitNodesStartPhase(a_nodeIds, nodeCount-1, 3, 120) == 0);
1068     CHECK(restarter.startNodes(&nodeId, 1) == 0);
1069     CHECK(restarter.waitClusterStarted(timeout) == 0);
1070     CHECK(pNdb->waitUntilReady() == 0);
1071 
1072     int count = records - 1;
1073     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
1074     CHECK(count == records);
1075     CHECK(utilTrans.clearTable(pNdb) == 0);
1076     i++;
1077   }
1078 
1079   g_info << "runSystemRestart8 finished" << endl;
1080 
1081   return result;
1082 }
1083 
runSystemRestart9(NDBT_Context * ctx,NDBT_Step * step)1084 int runSystemRestart9(NDBT_Context* ctx, NDBT_Step* step){
1085   Ndb* pNdb = GETNDB(step);
1086   int result = NDBT_OK;
1087   int timeout = 300;
1088   NdbRestarter restarter;
1089   Uint32 i = 1;
1090 
1091   UtilTransactions utilTrans(*ctx->getTab());
1092   HugoTransactions hugoTrans(*ctx->getTab());
1093 
1094   int args[] = { DumpStateOrd::DihMaxTimeBetweenLCP };
1095   int dump[] = { DumpStateOrd::DihStartLcpImmediately };
1096 
1097   do {
1098     CHECK(restarter.dumpStateAllNodes(args, 1) == 0);
1099 
1100     HugoOperations ops(* ctx->getTab());
1101     CHECK(ops.startTransaction(pNdb) == 0);
1102     for(i = 0; i<10; i++){
1103       CHECK(ops.pkInsertRecord(pNdb, i, 1, 1) == 0);
1104       CHECK(ops.execute_NoCommit(pNdb) == 0);
1105     }
1106     for(i = 0; i<10; i++){
1107       CHECK(ops.pkUpdateRecord(pNdb, i, 1) == 0);
1108       CHECK(ops.execute_NoCommit(pNdb) == 0);
1109     }
1110     NdbSleep_SecSleep(10);
1111     CHECK(restarter.dumpStateAllNodes(dump, 1) == 0);
1112     NdbSleep_SecSleep(10);
1113     CHECK(ops.execute_Commit(pNdb) == 0);
1114 
1115     CHECK(restarter.restartAll() == 0);
1116     CHECK(restarter.waitClusterStarted(timeout) == 0);
1117     CHECK(pNdb->waitUntilReady(timeout) == 0);
1118     ops.closeTransaction(pNdb);
1119   } while(0);
1120 
1121   g_info << "runSystemRestart9 finished" << endl;
1122 
1123   return result;
1124 }
1125 
runSystemRestart10(NDBT_Context * ctx,NDBT_Step * step)1126 int runSystemRestart10(NDBT_Context* ctx, NDBT_Step* step)
1127 {
1128   Ndb* pNdb = GETNDB(step);
1129   int result = NDBT_OK;
1130   //Uint32 loops = ctx->getNumLoops();
1131   Uint32 loops = 3;
1132   int records = ctx->getNumRecords();
1133   NdbRestarter restarter;
1134   Uint32 i = 1;
1135 
1136   const Uint32 nodeCount = restarter.getNumDbNodes();
1137   if(nodeCount < 4){
1138     g_info << "SR10 - Needs atleast 4 nodes to test" << endl;
1139     return NDBT_OK;
1140   }
1141 
1142   Vector<int> nodeIds;
1143   for(i = 0; i<nodeCount; i++)
1144     nodeIds.push_back(restarter.getDbNodeId(i));
1145 
1146   int a_nodeIds[64];
1147   if(nodeCount > 64)
1148     abort();
1149 
1150   UtilTransactions utilTrans(*ctx->getTab());
1151   HugoTransactions hugoTrans(*ctx->getTab());
1152 
1153   i = 1;
1154   while(i < loops && result != NDBT_FAILED){
1155 
1156     g_info << "Loop " << i << "/"<< loops <<" started" << endl;
1157     /**
1158      * 1. Load data
1159      * 2. Stop one node X (restart -nostart)
1160      * 3. Wait 10 seconds to ensure some GCPs are executed.
1161      * 4. Stop the rest of the nodes
1162      * 5. Start all nodes, but insert an error into the 2nd
1163      *    node to prevent it from passing phase 3 for 10
1164      *    seconds. The cluster should wait for these 10
1165      *    seconds, it cannot proceed at this point without
1166      *    it. If it tries to start without it, there will
1167      *    be a crash of the system restart.
1168      * 6. Verify records
1169      */
1170 
1171     g_info << "Loading records..." << endl;
1172     hugoTrans.loadTable(pNdb, records);
1173 
1174     Uint32 j = 0;
1175     for(Uint32 k = 0; k<nodeCount; k++)
1176     {
1177       a_nodeIds[j++] = nodeIds[k];
1178     }
1179 
1180     g_info << "Stop 2nd last node" << endl;
1181     CHECK(restarter.restartOneDbNode(a_nodeIds[nodeCount - 2],
1182 				     false,
1183 				     true,
1184 				     false) == 0);
1185 
1186     NdbSleep_SecSleep(10);
1187     g_info << "Stop rest of the nodes" << endl;
1188     CHECK(restarter.restartAll(false, true, false) == 0);
1189 
1190     int nodeId = a_nodeIds[nodeCount - 1];
1191 
1192     if (i == 0)
1193     {
1194       g_info << "Inject Error 1021 into last node to stop it in phase 1" << endl;
1195       CHECK(restarter.insertErrorInNode(nodeId, 1021) == 0);
1196     }
1197     else if (i == 1)
1198     {
1199       g_info << "Inject Error 1010 into last node to stop it in phase 4" << endl;
1200       CHECK(restarter.insertErrorInNode(nodeId, 1010) == 0);
1201     }
1202     if (i == 2)
1203     {
1204       g_info << "Start all nodes except the last node" << endl;
1205       CHECK(restarter.startNodes(a_nodeIds, nodeCount - 1) == 0);
1206       g_info << "Wait for those nodes to start, expect failure" << endl;
1207       CHECK(restarter.waitNodesStarted(a_nodeIds, nodeCount - 1, 30) != 0);
1208       g_info << "Start the last node" << endl;
1209       CHECK(restarter.startNodes(&nodeId, 1) == 0);
1210       g_info << "Wait for cluster to be started" << endl;
1211       CHECK(restarter.waitNodesStarted(a_nodeIds, nodeCount, 120) == 0);
1212     }
1213     else
1214     {
1215       CHECK(restarter.startNodes(a_nodeIds, nodeCount) == 0);
1216       g_info << "Wait for cluster to be started" << endl;
1217       CHECK(restarter.waitNodesStarted(a_nodeIds, nodeCount, 120) == 0);
1218     }
1219     g_info << "Perform consistency checks" << endl;
1220     CHECK(pNdb->waitUntilReady(5) == 0);
1221     int count = records - 1;
1222     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
1223     CHECK(count == records);
1224 
1225     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
1226     CHECK(count == records);
1227     CHECK(utilTrans.clearTable(pNdb) == 0);
1228 
1229     i++;
1230   }
1231 
1232   g_info << "runSystemRestart10 finished" << endl;
1233 
1234   return result;
1235 }
1236 
runBug18385(NDBT_Context * ctx,NDBT_Step * step)1237 int runBug18385(NDBT_Context* ctx, NDBT_Step* step){
1238   NdbRestarter restarter;
1239   const Uint32 nodeCount = restarter.getNumDbNodes();
1240   if(nodeCount < 2){
1241     g_info << "Bug18385 - Needs atleast 2 nodes to test" << endl;
1242     return NDBT_OK;
1243   }
1244 
1245   int node1 = restarter.getDbNodeId(rand() % nodeCount);
1246   int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand());
1247 
1248   if (node1 == -1 || node2 == -1)
1249     return NDBT_OK;
1250 
1251   int dump[] = { DumpStateOrd::DihSetTimeBetweenGcp, 300 };
1252 
1253   int result = NDBT_OK;
1254   do {
1255     CHECK(restarter.dumpStateAllNodes(dump, 2) == 0);
1256     CHECK(restarter.restartOneDbNode(node1, false, true, false) == 0);
1257     NdbSleep_SecSleep(3);
1258     CHECK(restarter.restartAll(false, true, false) == 0);
1259 
1260     Uint32 cnt = 0;
1261     int nodes[128];
1262     for(Uint32 i = 0; i<nodeCount; i++)
1263       if ((nodes[cnt] = restarter.getDbNodeId(i)) != node2)
1264 	cnt++;
1265 
1266     require(cnt == nodeCount - 1);
1267 
1268     CHECK(restarter.startNodes(nodes, cnt) == 0);
1269     CHECK(restarter.waitNodesStarted(nodes, cnt, 300) == 0);
1270 
1271     CHECK(restarter.insertErrorInNode(node2, 7170) == 0);
1272     CHECK(restarter.waitNodesNoStart(&node2, 1) == 0);
1273     CHECK(restarter.restartOneDbNode(node2, true, false, true) == 0);
1274     CHECK(restarter.waitNodesStarted(&node2, 1) == 0);
1275 
1276   } while(0);
1277 
1278   g_info << "Bug18385 finished" << endl;
1279 
1280   return result;
1281 }
1282 
runWaitStarted(NDBT_Context * ctx,NDBT_Step * step)1283 int runWaitStarted(NDBT_Context* ctx, NDBT_Step* step){
1284 
1285   NdbRestarter restarter;
1286   restarter.waitClusterStarted(300);
1287 
1288   NdbSleep_SecSleep(3);
1289   return NDBT_OK;
1290 }
1291 
runClearTable(NDBT_Context * ctx,NDBT_Step * step)1292 int runClearTable(NDBT_Context* ctx, NDBT_Step* step){
1293   int records = ctx->getNumRecords();
1294 
1295   Ndb* pNdb = GETNDB(step);
1296   if(pNdb->waitUntilReady(5) != 0){
1297     return NDBT_FAILED;
1298   }
1299 
1300   UtilTransactions utilTrans(*ctx->getTab());
1301   if (utilTrans.clearTable2(pNdb,  records) != 0){
1302     return NDBT_FAILED;
1303   }
1304   return NDBT_OK;
1305 }
1306 
1307 int
runBug21536(NDBT_Context * ctx,NDBT_Step * step)1308 runBug21536(NDBT_Context* ctx, NDBT_Step* step)
1309 {
1310   NdbRestarter restarter;
1311   const Uint32 nodeCount = restarter.getNumDbNodes();
1312   if(nodeCount != 2){
1313     g_info << "Bug21536 - 2 nodes to test" << endl;
1314     return NDBT_OK;
1315   }
1316 
1317   int node1 = restarter.getDbNodeId(rand() % nodeCount);
1318   int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand());
1319 
1320   if (node1 == -1 || node2 == -1)
1321     return NDBT_OK;
1322 
1323   int result = NDBT_OK;
1324   do {
1325     CHECK(restarter.restartOneDbNode(node1, false, true, true) == 0);
1326     CHECK(restarter.waitNodesNoStart(&node1, 1) == 0);
1327     CHECK(restarter.insertErrorInNode(node1, 1000) == 0);
1328     int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
1329     CHECK(restarter.dumpStateOneNode(node1, val2, 2) == 0);
1330     CHECK(restarter.startNodes(&node1, 1) == 0);
1331     restarter.waitNodesStartPhase(&node1, 1, 3, 120);
1332     CHECK(restarter.waitNodesNoStart(&node1, 1) == 0);
1333 
1334     CHECK(restarter.restartOneDbNode(node2, true, true, true) == 0);
1335     CHECK(restarter.waitNodesNoStart(&node2, 1) == 0);
1336     CHECK(restarter.startNodes(&node1, 1) == 0);
1337     CHECK(restarter.waitNodesStarted(&node1, 1) == 0);
1338     CHECK(restarter.startNodes(&node2, 1) == 0);
1339     CHECK(restarter.waitClusterStarted() == 0);
1340 
1341   } while(0);
1342 
1343   g_info << "Bug21536 finished" << endl;
1344 
1345   return result;
1346 }
1347 
1348 int
runBug24664(NDBT_Context * ctx,NDBT_Step * step)1349 runBug24664(NDBT_Context* ctx, NDBT_Step* step)
1350 {
1351   int result = NDBT_OK;
1352   NdbRestarter restarter;
1353   Ndb* pNdb = GETNDB(step);
1354 
1355   int records = ctx->getNumRecords();
1356   UtilTransactions utilTrans(*ctx->getTab());
1357   HugoTransactions hugoTrans(*ctx->getTab());
1358 
1359   int args[] = { DumpStateOrd::DihMaxTimeBetweenLCP };
1360   int dump[] = { DumpStateOrd::DihStartLcpImmediately };
1361 
1362   restarter.getNumDbNodes();
1363   int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 };
1364   NdbLogEventHandle handle =
1365     ndb_mgm_create_logevent_handle(restarter.handle, filter);
1366 
1367   struct ndb_logevent event;
1368 
1369   do {
1370     CHECK(restarter.dumpStateAllNodes(args, 1) == 0);
1371     CHECK(restarter.dumpStateAllNodes(dump, 1) == 0);
1372     while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
1373 	  event.type != NDB_LE_LocalCheckpointStarted);
1374     while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
1375 	  event.type != NDB_LE_LocalCheckpointCompleted);
1376 
1377     if (hugoTrans.loadTable(GETNDB(step), records) != 0){
1378       return NDBT_FAILED;
1379     }
1380 
1381     restarter.insertErrorInAllNodes(10039); // Hang LCP
1382     CHECK(restarter.dumpStateAllNodes(dump, 1) == 0);
1383     while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
1384 	  event.type != NDB_LE_LocalCheckpointStarted);
1385     NdbSleep_SecSleep(3);
1386     CHECK(utilTrans.clearTable(pNdb,  records) == 0);
1387     if (hugoTrans.loadTable(GETNDB(step), records) != 0){
1388       return NDBT_FAILED;
1389     }
1390 
1391     restarter.insertErrorInAllNodes(10040); // Resume LCP
1392     while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
1393 	  event.type != NDB_LE_LocalCheckpointCompleted);
1394 
1395     while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
1396 	  event.type != NDB_LE_GlobalCheckpointCompleted);
1397     while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
1398 	  event.type != NDB_LE_GlobalCheckpointCompleted);
1399     restarter.restartAll(false, false, true);
1400     CHECK(restarter.waitClusterStarted() == 0);
1401   } while(false);
1402 
1403   return result;
1404 }
1405 
1406 int
runBug27434(NDBT_Context * ctx,NDBT_Step * step)1407 runBug27434(NDBT_Context* ctx, NDBT_Step* step)
1408 {
1409   int result = NDBT_OK;
1410   NdbRestarter restarter;
1411   const Uint32 nodeCount = restarter.getNumDbNodes();
1412 
1413   if (nodeCount < 2)
1414     return NDBT_OK;
1415 
1416   int args[] = { DumpStateOrd::DihMaxTimeBetweenLCP };
1417   int dump[] = { DumpStateOrd::DihStartLcpImmediately };
1418 
1419   int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 };
1420   NdbLogEventHandle handle =
1421     ndb_mgm_create_logevent_handle(restarter.handle, filter);
1422 
1423   struct ndb_logevent event;
1424 
1425   do {
1426     int node1 = restarter.getDbNodeId(rand() % nodeCount);
1427     CHECK(restarter.restartOneDbNode(node1, false, true, true) == 0);
1428     NdbSleep_SecSleep(3);
1429     CHECK(restarter.waitNodesNoStart(&node1, 1) == 0);
1430 
1431     CHECK(restarter.dumpStateAllNodes(args, 1) == 0);
1432 
1433     for (Uint32 i = 0; i<3; i++)
1434     {
1435       CHECK(restarter.dumpStateAllNodes(dump, 1) == 0);
1436       while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
1437 	    event.type != NDB_LE_LocalCheckpointStarted);
1438       while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
1439 	    event.type != NDB_LE_LocalCheckpointCompleted);
1440     }
1441 
1442     restarter.restartAll(false, true, true);
1443     NdbSleep_SecSleep(3);
1444     CHECK(restarter.waitClusterNoStart() == 0);
1445     restarter.insertErrorInNode(node1, 5046);
1446     restarter.startAll();
1447     CHECK(restarter.waitClusterStarted() == 0);
1448   } while(false);
1449 
1450   return result;
1451 }
1452 
1453 int
runBug29167(NDBT_Context * ctx,NDBT_Step * step)1454 runBug29167(NDBT_Context* ctx, NDBT_Step* step)
1455 {
1456   int result = NDBT_OK;
1457   NdbRestarter restarter;
1458   const Uint32 nodeCount = restarter.getNumDbNodes();
1459 
1460   if (nodeCount < 4)
1461     return NDBT_OK;
1462 
1463   struct ndb_logevent event;
1464   int master = restarter.getMasterNodeId();
1465   do {
1466     int node1 = restarter.getRandomNodeOtherNodeGroup(master, rand());
1467     int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand());
1468 
1469     ndbout_c("node1: %u node2: %u", node1, node2);
1470 
1471     int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
1472     restarter.dumpStateAllNodes(val2, 2);
1473     int dump[] = { DumpStateOrd::DihSetTimeBetweenGcp, 30000 };
1474     restarter.dumpStateAllNodes(dump, 2);
1475 
1476     int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 };
1477     NdbLogEventHandle handle =
1478       ndb_mgm_create_logevent_handle(restarter.handle, filter);
1479 
1480     while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
1481           event.type != NDB_LE_GlobalCheckpointCompleted);
1482 
1483     ndb_mgm_destroy_logevent_handle(&handle);
1484 
1485     CHECK(restarter.insertErrorInAllNodes(932) == 0);
1486 
1487     CHECK(restarter.insertErrorInNode(node1, 7183) == 0);
1488     CHECK(restarter.insertErrorInNode(node2, 7183) == 0);
1489 
1490     CHECK(restarter.waitClusterNoStart() == 0);
1491     restarter.startAll();
1492     CHECK(restarter.waitClusterStarted() == 0);
1493   } while(false);
1494 
1495   return result;
1496 }
1497 int
runBug28770(NDBT_Context * ctx,NDBT_Step * step)1498 runBug28770(NDBT_Context* ctx, NDBT_Step* step) {
1499   Ndb* pNdb = GETNDB(step);
1500   NdbRestarter restarter;
1501   int result = NDBT_OK;
1502   int count = 0;
1503   Uint32 i = 0;
1504   Uint32 loops = ctx->getNumLoops();
1505   int records = ctx->getNumRecords();
1506   UtilTransactions utilTrans(*ctx->getTab());
1507   HugoTransactions hugoTrans(*ctx->getTab());
1508 
1509   g_info << "Loading records..." << endl;  hugoTrans.loadTable(pNdb,
1510  records);
1511 
1512 
1513   while(i<=loops && result != NDBT_FAILED)
1514   {
1515     g_info << "Loop " << i << "/"<< loops <<" started" << endl;
1516     if (i == 0)
1517     {
1518       CHECK(restarter.restartAll(false, true, false) == 0); // graceful
1519     }
1520     else
1521     {
1522       CHECK(restarter.restartAll(false, true, true) == 0); // abort
1523     }
1524     CHECK(restarter.waitClusterNoStart() == 0);
1525     restarter.insertErrorInAllNodes(6024);
1526     CHECK(restarter.startAll()== 0);
1527     CHECK(restarter.waitClusterStarted() == 0);
1528     CHECK(pNdb->waitUntilReady() == 0);
1529     CHECK(utilTrans.selectCount(pNdb, 64, &count) == 0);
1530     CHECK(count == records);
1531     i++;
1532   }
1533   ndbout << " runBug28770 finished" << endl;
1534   return result;
1535 }
1536 
1537 int
runStopper(NDBT_Context * ctx,NDBT_Step * step)1538 runStopper(NDBT_Context* ctx, NDBT_Step* step)
1539 {
1540   NdbRestarter restarter;
1541   Uint32 stop = 0;
1542 loop:
1543   while (!ctx->isTestStopped() &&
1544 	 ((stop = ctx->getProperty("StopAbort", Uint32(0))) == 0))
1545   {
1546     NdbSleep_MilliSleep(30);
1547   }
1548 
1549   if (ctx->isTestStopped())
1550   {
1551     return NDBT_OK;
1552   }
1553 
1554   ctx->setProperty("StopAbort", Uint32(0));
1555 
1556   ndbout << "Killing in " << stop << "ms..." << flush;
1557   NdbSleep_MilliSleep(stop);
1558   restarter.restartAll(false, true, true);
1559   goto loop;
1560 }
1561 
runSR_DD_1(NDBT_Context * ctx,NDBT_Step * step)1562 int runSR_DD_1(NDBT_Context* ctx, NDBT_Step* step)
1563 {
1564   Ndb* pNdb = GETNDB(step);
1565   int result = NDBT_OK;
1566   Uint32 loops = ctx->getNumLoops();
1567   NdbRestarter restarter;
1568   NdbBackup backup;
1569   bool lcploop = ctx->getProperty("LCP", (unsigned)0);
1570   bool all = ctx->getProperty("ALL", (unsigned)0);
1571 
1572   Uint32 i = 1;
1573 
1574   int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
1575   int lcp = DumpStateOrd::DihMinTimeBetweenLCP;
1576 
1577   int startFrom = 0;
1578 
1579   HugoTransactions hugoTrans(*ctx->getTab());
1580   while(i<=loops && result != NDBT_FAILED)
1581   {
1582     if (i > 0 && ctx->closeToTimeout(30))
1583       break;
1584 
1585     if (lcploop)
1586     {
1587       CHECK(restarter.dumpStateAllNodes(&lcp, 1) == 0);
1588     }
1589 
1590     int nodeId = restarter.getDbNodeId(rand() % restarter.getNumDbNodes());
1591     //CHECK(restarter.dumpStateAllNodes(&val, 1) == 0);
1592 
1593     ndbout << "Loop " << i << "/"<< loops <<" started" << endl;
1594     ndbout << "Loading records..." << startFrom << endl;
1595     CHECK(hugoTrans.loadTable(pNdb, startFrom) == 0);
1596 
1597     if (!all)
1598     {
1599       ndbout << "Making " << nodeId << " crash" << endl;
1600       int kill[] = { 9999, 1000, 3000 };
1601       CHECK(restarter.dumpStateOneNode(nodeId, val, 2) == 0);
1602       CHECK(restarter.dumpStateOneNode(nodeId, kill, 3) == 0);
1603     }
1604     else
1605     {
1606       ndbout << "Crashing cluster" << endl;
1607       ctx->setProperty("StopAbort", 1000 + rand() % (3000 - 1000));
1608     }
1609     Uint64 end = NdbTick_CurrentMillisecond() + 4000;
1610     Uint32 row = startFrom;
1611     do {
1612       ndbout << "Loading from " << row << " to " << row + 1000 << endl;
1613       if (hugoTrans.loadTableStartFrom(pNdb, row, 1000) != 0)
1614 	break;
1615       row += 1000;
1616     } while (NdbTick_CurrentMillisecond() < end);
1617 
1618     if (!all)
1619     {
1620       ndbout << "Waiting for " << nodeId << " to restart" << endl;
1621       CHECK(restarter.waitNodesNoStart(&nodeId, 1) == 0);
1622       ndbout << "Restarting cluster" << endl;
1623       CHECK(restarter.restartAll(false, true, true) == 0);
1624     }
1625     else
1626     {
1627       ndbout << "Waiting for cluster to restart" << endl;
1628     }
1629     CHECK(restarter.waitClusterNoStart() == 0);
1630     CHECK(restarter.startAll() == 0);
1631     CHECK(restarter.waitClusterStarted() == 0);
1632     CHECK(pNdb->waitUntilReady() == 0);
1633 
1634     ndbout << "Starting backup..." << flush;
1635     CHECK(backup.start() == 0);
1636     ndbout << "done" << endl;
1637 
1638     int cnt = 0;
1639     CHECK(hugoTrans.selectCount(pNdb, 0, &cnt) == 0);
1640     ndbout << "Found " << cnt << " records..." << endl;
1641     ndbout << "Updating..." << endl;
1642     CHECK(hugoTrans.scanUpdateRecords(pNdb,
1643                                       NdbScanOperation::SF_TupScan, cnt) == 0
1644           || hugoTrans.getRetryMaxReached());
1645     ndbout << "Clearing..." << endl;
1646     CHECK(hugoTrans.clearTable(pNdb,
1647                                NdbScanOperation::SF_TupScan, cnt) == 0);
1648 
1649     if (cnt > startFrom)
1650     {
1651       startFrom = cnt;
1652     }
1653     startFrom += 1000;
1654     i++;
1655   }
1656 
1657   ndbout << "runSR_DD_1 finished" << endl;
1658   ctx->stopTest();
1659   return result;
1660 }
1661 
runSR_DD_2(NDBT_Context * ctx,NDBT_Step * step)1662 int runSR_DD_2(NDBT_Context* ctx, NDBT_Step* step)
1663 {
1664   Ndb* pNdb = GETNDB(step);
1665   int result = NDBT_OK;
1666   Uint32 loops = ctx->getNumLoops();
1667   Uint32 rows = ctx->getNumRecords();
1668   NdbRestarter restarter;
1669   NdbBackup backup;
1670   bool lcploop = ctx->getProperty("LCP", (unsigned)0);
1671   bool all = ctx->getProperty("ALL", (unsigned)0);
1672   int error = (int)ctx->getProperty("ERROR", (unsigned)0);
1673   rows = ctx->getProperty("ROWS", rows);
1674 
1675   Uint32 i = 1;
1676 
1677   int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
1678   int lcp = DumpStateOrd::DihMinTimeBetweenLCP;
1679 
1680   if (error)
1681   {
1682     restarter.insertErrorInAllNodes(error);
1683   }
1684 
1685   HugoTransactions hugoTrans(*ctx->getTab());
1686   while(i<=loops && result != NDBT_FAILED)
1687   {
1688     if (i > 0 && ctx->closeToTimeout(30))
1689       break;
1690 
1691     if (lcploop)
1692     {
1693       CHECK(restarter.dumpStateAllNodes(&lcp, 1) == 0);
1694     }
1695 
1696     int nodeId = restarter.getDbNodeId(rand() % restarter.getNumDbNodes());
1697 
1698     if (!all)
1699     {
1700       ndbout << "Making " << nodeId << " crash" << endl;
1701       int kill[] = { 9999, 3000, 10000 };
1702       CHECK(restarter.dumpStateOneNode(nodeId, val, 2) == 0);
1703       CHECK(restarter.dumpStateOneNode(nodeId, kill, 3) == 0);
1704     }
1705     else
1706     {
1707       ndbout << "Crashing cluster" << endl;
1708       ctx->setProperty("StopAbort", 3000 + rand() % (10000 - 3000));
1709     }
1710 
1711     Uint64 end = NdbTick_CurrentMillisecond() + 11000;
1712     do {
1713       if (hugoTrans.loadTable(pNdb, rows) != 0)
1714 	break;
1715 
1716       if (hugoTrans.clearTable(pNdb, NdbScanOperation::SF_TupScan, rows) != 0)
1717 	break;
1718     } while (NdbTick_CurrentMillisecond() < end);
1719 
1720     if (!all)
1721     {
1722       ndbout << "Waiting for " << nodeId << " to restart" << endl;
1723       CHECK(restarter.waitNodesNoStart(&nodeId, 1) == 0);
1724       ndbout << "Restarting cluster" << endl;
1725       CHECK(restarter.restartAll(false, true, true) == 0);
1726     }
1727     else
1728     {
1729       ndbout << "Waiting for cluster to restart" << endl;
1730     }
1731 
1732     CHECK(restarter.waitClusterNoStart() == 0);
1733     CHECK(restarter.startAll() == 0);
1734     CHECK(restarter.waitClusterStarted() == 0);
1735     CHECK(pNdb->waitUntilReady() == 0);
1736 
1737     if (error)
1738     {
1739       restarter.insertErrorInAllNodes(error);
1740     }
1741 
1742     ndbout << "Starting backup..." << flush;
1743     CHECK(backup.start() == 0);
1744     ndbout << "done" << endl;
1745 
1746     int cnt = 0;
1747     CHECK(hugoTrans.selectCount(pNdb, 0, &cnt) == 0);
1748     ndbout << "Found " << cnt << " records..." << endl;
1749     ndbout << "Updating..." << endl;
1750     CHECK(hugoTrans.scanUpdateRecords(pNdb,
1751                                       NdbScanOperation::SF_TupScan, cnt) == 0
1752           || hugoTrans.getRetryMaxReached());
1753     ndbout << "Clearing..." << endl;
1754     CHECK(hugoTrans.clearTable(pNdb,
1755                                NdbScanOperation::SF_TupScan, cnt) == 0);
1756     i++;
1757   }
1758 
1759   if (error)
1760   {
1761     restarter.insertErrorInAllNodes(0);
1762   }
1763 
1764   ndbout << "runSR_DD_2 finished" << endl;
1765   ctx->stopTest();
1766   return result;
1767 }
1768 
runSR_DD_3(NDBT_Context * ctx,NDBT_Step * step)1769 int runSR_DD_3(NDBT_Context* ctx, NDBT_Step* step)
1770 {
1771   Ndb* pNdb = GETNDB(step);
1772   int result = NDBT_OK;
1773   Uint32 loops = ctx->getNumLoops();
1774   Uint32 rows = ctx->getNumRecords();
1775   NdbRestarter restarter;
1776   NdbBackup backup;
1777   bool lcploop = ctx->getProperty("LCP", (unsigned)0);
1778   bool all = ctx->getProperty("ALL", (unsigned)0);
1779   int error = (int)ctx->getProperty("ERROR", (unsigned)0);
1780   rows = ctx->getProperty("ROWS", rows);
1781 
1782   Uint32 i = 1;
1783 
1784   int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
1785   int lcp = DumpStateOrd::DihMinTimeBetweenLCP;
1786 
1787   if (error)
1788   {
1789     restarter.insertErrorInAllNodes(error);
1790   }
1791 
1792   HugoTransactions hugoTrans(*ctx->getTab());
1793   while(i<=loops && result != NDBT_FAILED)
1794   {
1795     if (i > 0 && ctx->closeToTimeout(30))
1796       break;
1797 
1798     if (lcploop)
1799     {
1800       CHECK(restarter.dumpStateAllNodes(&lcp, 1) == 0);
1801     }
1802 
1803     int nodeId = restarter.getDbNodeId(rand() % restarter.getNumDbNodes());
1804 
1805     if (hugoTrans.loadTable(pNdb, rows) != 0)
1806     {
1807       return NDBT_FAILED;
1808     }
1809 
1810     if (!all)
1811     {
1812       ndbout << "Making " << nodeId << " crash" << endl;
1813       int kill[] = { 9999, 3000, 10000 };
1814       CHECK(restarter.dumpStateOneNode(nodeId, val, 2) == 0);
1815       CHECK(restarter.dumpStateOneNode(nodeId, kill, 3) == 0);
1816     }
1817     else
1818     {
1819       ndbout << "Crashing cluster" << endl;
1820       ctx->setProperty("StopAbort", 3000 + rand() % (10000 - 3000));
1821     }
1822 
1823     int deletedrows[100];
1824     Uint64 end = NdbTick_CurrentMillisecond() + 13000;
1825     do {
1826       Uint32 cnt = 0;
1827       for (; cnt<NDB_ARRAY_SIZE(deletedrows); cnt++)
1828       {
1829         deletedrows[cnt] = rand() % rows;
1830         if (hugoTrans.startTransaction(pNdb))
1831           break;
1832         if (hugoTrans.pkDeleteRecord(pNdb, deletedrows[cnt]))
1833           break;
1834         if (hugoTrans.execute_Commit(pNdb))
1835           break;
1836         hugoTrans.closeTransaction(pNdb);
1837       }
1838       if (hugoTrans.getTransaction() != 0)
1839         hugoTrans.closeTransaction(pNdb);
1840 
1841       if (hugoTrans.scanUpdateRecords(pNdb, NdbScanOperation::SF_TupScan,0)!=0)
1842 	break;
1843 
1844       for (Uint32 n = 0; n<cnt; n++)
1845       {
1846         if (hugoTrans.startTransaction(pNdb))
1847           break;
1848         if (hugoTrans.pkInsertRecord(pNdb, deletedrows[n], 1, rand()))
1849           break;
1850         if (hugoTrans.execute_Commit(pNdb))
1851           break;
1852         hugoTrans.closeTransaction(pNdb);
1853       }
1854       if (hugoTrans.getTransaction() != 0)
1855         hugoTrans.closeTransaction(pNdb);
1856 
1857       if (hugoTrans.scanUpdateRecords(pNdb, NdbScanOperation::SF_TupScan,0)!=0
1858           && !hugoTrans.getRetryMaxReached())
1859 	break;
1860     } while (NdbTick_CurrentMillisecond() < end);
1861 
1862     if (!all)
1863     {
1864       ndbout << "Waiting for " << nodeId << " to restart" << endl;
1865       CHECK(restarter.waitNodesNoStart(&nodeId, 1) == 0);
1866       ndbout << "Restarting cluster" << endl;
1867       CHECK(restarter.restartAll(false, true, true) == 0);
1868     }
1869     else
1870     {
1871       ndbout << "Waiting for cluster to restart" << endl;
1872     }
1873 
1874     CHECK(restarter.waitClusterNoStart() == 0);
1875     CHECK(restarter.startAll() == 0);
1876     CHECK(restarter.waitClusterStarted() == 0);
1877     if (error)
1878     {
1879       restarter.insertErrorInAllNodes(error);
1880     }
1881 
1882     ndbout << "Starting backup..." << flush;
1883     CHECK(backup.start() == 0);
1884     ndbout << "done" << endl;
1885 
1886     int cnt = 0;
1887     CHECK(hugoTrans.selectCount(pNdb, 0, &cnt) == 0);
1888     ndbout << "Found " << cnt << " records..." << endl;
1889     ndbout << "Updating..." << endl;
1890     CHECK(hugoTrans.scanUpdateRecords(pNdb,
1891                                       NdbScanOperation::SF_TupScan, cnt) == 0);
1892     ndbout << "Clearing..." << endl;
1893     CHECK(hugoTrans.clearTable(pNdb,
1894                                NdbScanOperation::SF_TupScan, cnt) == 0);
1895     i++;
1896   }
1897 
1898   if (error)
1899   {
1900     restarter.insertErrorInAllNodes(0);
1901   }
1902 
1903   ndbout << "runSR_DD_3 finished" << endl;
1904   ctx->stopTest();
1905   return result;
1906 }
1907 
runBug22696(NDBT_Context * ctx,NDBT_Step * step)1908 int runBug22696(NDBT_Context* ctx, NDBT_Step* step)
1909 {
1910   Ndb* pNdb = GETNDB(step);
1911   int result = NDBT_OK;
1912   Uint32 loops = ctx->getNumLoops();
1913   Uint32 rows = ctx->getNumRecords();
1914   NdbRestarter restarter;
1915   HugoTransactions hugoTrans(*ctx->getTab());
1916 
1917   Uint32 i = 0;
1918   while(i<=loops && result != NDBT_FAILED)
1919   {
1920     ndbout_c("loop %u", i);
1921     for (Uint32 j = 0; j<10 && result != NDBT_FAILED; j++)
1922       CHECK(hugoTrans.scanUpdateRecords(pNdb, rows) == 0);
1923 
1924     CHECK(restarter.restartAll(false, true, i > 0 ? true : false) == 0);
1925     CHECK(restarter.waitClusterNoStart() == 0);
1926     CHECK(restarter.insertErrorInAllNodes(7072) == 0);
1927     CHECK(restarter.startAll() == 0);
1928     CHECK(restarter.waitClusterStarted() == 0);
1929     CHECK(pNdb->waitUntilReady() == 0);
1930 
1931     i++;
1932     if (i < loops)
1933     {
1934       NdbSleep_SecSleep(5); // Wait for a few gcp
1935     }
1936   }
1937 
1938   ctx->stopTest();
1939   return result;
1940 }
1941 
1942 int
runCreateAllTables(NDBT_Context * ctx,NDBT_Step * step)1943 runCreateAllTables(NDBT_Context* ctx, NDBT_Step* step)
1944 {
1945   if (NDBT_Tables::createAllTables(GETNDB(step), false, true))
1946     return NDBT_FAILED;
1947   return NDBT_OK;
1948 }
1949 
1950 int
runBasic(NDBT_Context * ctx,NDBT_Step * step)1951 runBasic(NDBT_Context* ctx, NDBT_Step* step)
1952 {
1953   Ndb* pNdb = GETNDB(step);
1954   NdbDictionary::Dictionary * pDict = pNdb->getDictionary();
1955   int loops = ctx->getNumLoops();
1956   int records = ctx->getNumRecords();
1957   NdbRestarter restarter;
1958   int result = NDBT_OK;
1959 
1960   for (int l = 0; l<loops; l++)
1961   {
1962     for (int i = 0; i<NDBT_Tables::getNumTables(); i++)
1963     {
1964       const NdbDictionary::Table* tab =
1965         pDict->getTable(NDBT_Tables::getTable(i)->getName());
1966       HugoTransactions trans(* tab);
1967       switch(l % 3){
1968       case 0:
1969         trans.loadTable(pNdb, records);
1970         trans.scanUpdateRecords(pNdb, records);
1971         break;
1972       case 1:
1973         trans.scanUpdateRecords(pNdb, records);
1974         trans.clearTable(pNdb, records/2);
1975         trans.loadTable(pNdb, records/2);
1976         break;
1977       case 2:
1978         trans.clearTable(pNdb, records/2);
1979         trans.loadTable(pNdb, records/2);
1980         trans.clearTable(pNdb, records/2);
1981         break;
1982       }
1983     }
1984 
1985     ndbout << "Restarting cluster..." << endl;
1986     CHECK(restarter.restartAll(false, true, false) == 0);
1987     CHECK(restarter.waitClusterNoStart() == 0);
1988     CHECK(restarter.startAll() == 0);
1989     CHECK(restarter.waitClusterStarted() == 0);
1990     CHECK(pNdb->waitUntilReady() == 0);
1991 
1992     for (int i = 0; i<NDBT_Tables::getNumTables(); i++)
1993     {
1994       const NdbDictionary::Table* tab =
1995         pDict->getTable(NDBT_Tables::getTable(i)->getName());
1996       HugoTransactions trans(* tab);
1997       trans.scanUpdateRecords(pNdb, records);
1998     }
1999   }
2000 
2001   return result;
2002 }
2003 
2004 int
runDropAllTables(NDBT_Context * ctx,NDBT_Step * step)2005 runDropAllTables(NDBT_Context* ctx, NDBT_Step* step)
2006 {
2007   NDBT_Tables::dropAllTables(GETNDB(step));
2008   return NDBT_OK;
2009 }
2010 
2011 int
runTO(NDBT_Context * ctx,NDBT_Step * step)2012 runTO(NDBT_Context* ctx, NDBT_Step* step)
2013 {
2014   Ndb* pNdb = GETNDB(step);
2015   int result = NDBT_OK;
2016   Uint32 loops = ctx->getNumLoops();
2017   Uint32 rows = ctx->getNumRecords();
2018   NdbRestarter res;
2019   HugoTransactions hugoTrans(*ctx->getTab());
2020 
2021   if (res.getNumDbNodes() < 2)
2022     return NDBT_OK;
2023 
2024   Uint32 nodeGroups[256];
2025   Bitmask<256/32> nodeGroupMap;
2026   for (int j = 0; j<res.getNumDbNodes(); j++)
2027   {
2028     int node = res.getDbNodeId(j);
2029     nodeGroups[node] = res.getNodeGroup(node);
2030     nodeGroupMap.set(nodeGroups[node]);
2031   }
2032 
2033   struct ndb_logevent event;
2034   int val[] = { DumpStateOrd::DihMinTimeBetweenLCP, 0 };
2035 
2036   Uint32 i = 0;
2037   while(i<=loops && result != NDBT_FAILED)
2038   {
2039     if (i > 0 && ctx->closeToTimeout(35))
2040       break;
2041 
2042     CHECK(res.dumpStateAllNodes(val, 1) == 0);
2043 
2044     int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 };
2045     NdbLogEventHandle handle =
2046       ndb_mgm_create_logevent_handle(res.handle, filter);
2047 
2048     Bitmask<256/32> notstopped = nodeGroupMap;
2049     while(!notstopped.isclear())
2050     {
2051       int node;
2052       do {
2053         node = res.getDbNodeId(rand() % res.getNumDbNodes());
2054       } while (!notstopped.get(nodeGroups[node]));
2055 
2056       notstopped.clear(nodeGroups[node]);
2057       ndbout_c("stopping %u", node);
2058       CHECK(res.restartOneDbNode(node, false, true, true) == 0);
2059       CHECK(res.waitNodesNoStart(&node, 1) == 0);
2060       for (Uint32 j = 0; j<25; j++)
2061       {
2062         if (! (hugoTrans.scanUpdateRecords(pNdb, 0) == 0))
2063           break;
2064       }
2065       while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
2066             event.type != NDB_LE_LocalCheckpointCompleted);
2067     }
2068 
2069     while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
2070 	  event.type != NDB_LE_LocalCheckpointCompleted);
2071 
2072     Uint32 LCP = event.LocalCheckpointCompleted.lci;
2073     ndbout_c("LCP: %u", LCP);
2074 
2075     do
2076     {
2077       bzero(&event, sizeof(event));
2078       while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
2079             event.type != NDB_LE_LocalCheckpointCompleted)
2080         bzero(&event, sizeof(event));
2081 
2082       if (event.type == NDB_LE_LocalCheckpointCompleted &&
2083           event.LocalCheckpointCompleted.lci < LCP + 3)
2084       {
2085         hugoTrans.scanUpdateRecords(pNdb, 0);
2086       }
2087       else
2088       {
2089         break;
2090       }
2091     } while (true);
2092 
2093     ndbout_c("LCP: %u", event.LocalCheckpointCompleted.lci);
2094 
2095     CHECK(res.restartAll(false, true, true) == 0);
2096     CHECK(res.waitClusterNoStart() == 0);
2097     CHECK(res.startAll() == 0);
2098     Uint64 now = NdbTick_CurrentMillisecond();
2099     /**
2100      * running transaction while cluster is down...
2101      * causes *lots* of printouts...redirect to /dev/null
2102      * so that log files doe't get megabytes
2103      */
2104     NullOutputStream null;
2105     OutputStream * save[1];
2106     save[0] = g_err.m_out;
2107     g_err.m_out = &null;
2108     do
2109     {
2110       hugoTrans.scanUpdateRecords(pNdb, 0);
2111     } while (NdbTick_CurrentMillisecond() < (now + 30000));
2112     g_err.m_out = save[0];
2113     CHECK(res.waitClusterStarted() == 0);
2114     CHECK(pNdb->waitUntilReady() == 0);
2115 
2116     hugoTrans.clearTable(pNdb);
2117     hugoTrans.loadTable(pNdb, rows);
2118 
2119     CHECK(res.dumpStateAllNodes(val, 1) == 0);
2120 
2121     while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
2122           event.type != NDB_LE_LocalCheckpointCompleted);
2123 
2124     ndb_mgm_destroy_logevent_handle(&handle);
2125 
2126     i++;
2127   }
2128 
2129   res.dumpStateAllNodes(val, 2); // Reset LCP time
2130 
2131   ctx->stopTest();
2132   return result;
2133 }
2134 
runBug45154(NDBT_Context * ctx,NDBT_Step * step)2135 int runBug45154(NDBT_Context* ctx, NDBT_Step* step)
2136 {
2137   Ndb* pNdb = GETNDB(step);
2138   NdbDictionary::Dictionary * pDict = pNdb->getDictionary();
2139   int result = NDBT_OK;
2140   Uint32 loops = ctx->getNumLoops();
2141   Uint32 rows = ctx->getNumRecords();
2142   NdbRestarter restarter;
2143 
2144   restarter.getNumDbNodes();
2145   int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 };
2146   NdbLogEventHandle handle =
2147     ndb_mgm_create_logevent_handle(restarter.handle, filter);
2148 
2149   struct ndb_logevent event;
2150 
2151   Uint32 frag_data[128];
2152   bzero(frag_data, sizeof(frag_data));
2153 
2154   NdbDictionary::HashMap map;
2155   pDict->getDefaultHashMap(map, 2*restarter.getNumDbNodes());
2156   pDict->createHashMap(map);
2157 
2158   pDict->getDefaultHashMap(map, restarter.getNumDbNodes());
2159   pDict->createHashMap(map);
2160 
2161   for(Uint32 i = 0; i < loops && result != NDBT_FAILED; i++)
2162   {
2163     ndbout_c("loop %u", i);
2164 
2165     NdbDictionary::Table copy = *ctx->getTab();
2166     copy.setName("BUG_45154");
2167     copy.setFragmentType(NdbDictionary::Object::DistrKeyLin);
2168     copy.setFragmentCount(2 * restarter.getNumDbNodes());
2169     copy.setFragmentData(frag_data, 2*restarter.getNumDbNodes());
2170     pDict->dropTable("BUG_45154");
2171     int res = pDict->createTable(copy);
2172     if (res != 0)
2173     {
2174       ndbout << pDict->getNdbError() << endl;
2175       return NDBT_FAILED;
2176     }
2177     const NdbDictionary::Table* copyptr= pDict->getTable("BUG_45154");
2178 
2179     {
2180       HugoTransactions hugoTrans(*copyptr);
2181       hugoTrans.loadTable(pNdb, rows);
2182     }
2183 
2184     int dump[] = { DumpStateOrd::DihStartLcpImmediately };
2185     for (int l = 0; l<2; l++)
2186     {
2187       CHECK(restarter.dumpStateAllNodes(dump, 1) == 0);
2188       while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
2189             event.type != NDB_LE_LocalCheckpointStarted);
2190       while(ndb_logevent_get_next(handle, &event, 0) >= 0 &&
2191             event.type != NDB_LE_LocalCheckpointCompleted);
2192     }
2193 
2194     pDict->dropTable("BUG_45154");
2195     copy.setFragmentCount(restarter.getNumDbNodes());
2196     copy.setFragmentData(frag_data, restarter.getNumDbNodes());
2197     res = pDict->createTable(copy);
2198     if (res != 0)
2199     {
2200       ndbout << pDict->getNdbError() << endl;
2201       return NDBT_FAILED;
2202     }
2203     copyptr = pDict->getTable("BUG_45154");
2204 
2205     {
2206       HugoTransactions hugoTrans(*copyptr);
2207       hugoTrans.loadTable(pNdb, rows);
2208       for (Uint32 pp = 0; pp<3; pp++)
2209         hugoTrans.scanUpdateRecords(pNdb, rows);
2210     }
2211     restarter.restartAll(false, true, true);
2212     restarter.waitClusterNoStart();
2213     restarter.startAll();
2214     restarter.waitClusterStarted();
2215 
2216     pDict->dropTable("BUG_45154");
2217   }
2218 
2219   ctx->stopTest();
2220   return result;
2221 }
2222 
runBug46651(NDBT_Context * ctx,NDBT_Step * step)2223 int runBug46651(NDBT_Context* ctx, NDBT_Step* step)
2224 {
2225   Ndb* pNdb = GETNDB(step);
2226   NdbDictionary::Dictionary * pDict = pNdb->getDictionary();
2227   Uint32 rows = ctx->getNumRecords();
2228   NdbRestarter res;
2229 
2230   NdbDictionary::Table tab;
2231   tab.setName("BUG_46651");
2232 
2233   NdbDictionary::Column col;
2234   col.setName("ATTR1");
2235   col.setType(NdbDictionary::Column::Unsigned);
2236   col.setLength(1);
2237   col.setPrimaryKey(true);
2238   col.setNullable(false);
2239   col.setAutoIncrement(false);
2240   tab.addColumn(col);
2241   col.setName("ATTR2");
2242   col.setType(NdbDictionary::Column::Unsigned);
2243   col.setLength(1);
2244   col.setPrimaryKey(false);
2245   col.setNullable(false);
2246   tab.addColumn(col);
2247   col.setName("ATTR3");
2248   col.setType(NdbDictionary::Column::Unsigned);
2249   col.setLength(1);
2250   col.setPrimaryKey(false);
2251   col.setNullable(false);
2252   tab.addColumn(col);
2253   tab.setForceVarPart(true);
2254   pDict->dropTable(tab.getName());
2255   if (pDict->createTable(tab))
2256   {
2257     ndbout << pDict->getNdbError() << endl;
2258     return NDBT_FAILED;
2259   }
2260 
2261   const NdbDictionary::Table* pTab = pDict->getTable(tab.getName());
2262   if (pTab == 0)
2263   {
2264     ndbout << pDict->getNdbError() << endl;
2265     return NDBT_FAILED;
2266   }
2267 
2268   {
2269     HugoTransactions trans(* pTab);
2270     if (trans.loadTable(pNdb, rows) != 0)
2271     {
2272       return NDBT_FAILED;
2273     }
2274   }
2275 
2276   res.restartAll2(NdbRestarter::NRRF_NOSTART);
2277   if (res.waitClusterNoStart())
2278     return NDBT_FAILED;
2279   res.startAll();
2280   if (res.waitClusterStarted())
2281     return NDBT_FAILED;
2282 
2283   pNdb->waitUntilReady();
2284 
2285   NdbDictionary::Table newTab = *pTab;
2286   col.setName("ATTR4");
2287   col.setType(NdbDictionary::Column::Varbinary);
2288   col.setLength(25);
2289   col.setPrimaryKey(false);
2290   col.setNullable(true);
2291   col.setDynamic(true);
2292   newTab.addColumn(col);
2293 
2294   if (pDict->alterTable(*pTab, newTab))
2295   {
2296     ndbout << pDict->getNdbError() << endl;
2297     return NDBT_FAILED;
2298   }
2299 
2300   res.restartAll2(NdbRestarter::NRRF_NOSTART | NdbRestarter::NRRF_ABORT);
2301   if (res.waitClusterNoStart())
2302     return NDBT_FAILED;
2303   res.startAll();
2304   if (res.waitClusterStarted())
2305     return NDBT_FAILED;
2306 
2307   pNdb->waitUntilReady();
2308   pDict->dropTable(tab.getName());
2309 
2310   return NDBT_OK;
2311 }
2312 
2313 int
runBug46412(NDBT_Context * ctx,NDBT_Step * step)2314 runBug46412(NDBT_Context* ctx, NDBT_Step* step)
2315 {
2316   Uint32 loops = ctx->getNumLoops();
2317   NdbRestarter res;
2318   const Uint32 nodeCount = res.getNumDbNodes();
2319   if(nodeCount < 2)
2320   {
2321     return NDBT_OK;
2322   }
2323 
2324   for (Uint32 l = 0; l<loops; l++)
2325   {
2326 loop:
2327     printf("checking nodegroups of getNextMasterNodeId(): ");
2328     int nodes[256];
2329     bzero(nodes, sizeof(nodes));
2330     nodes[0] = res.getMasterNodeId();
2331     printf("%d ", nodes[0]);
2332     for (Uint32 i = 1; i<nodeCount; i++)
2333     {
2334       nodes[i] = res.getNextMasterNodeId(nodes[i-1]);
2335       printf("%d ", nodes[i]);
2336     }
2337     printf("\n");
2338 
2339     Bitmask<256/32> ng;
2340     int cnt = 0;
2341     int restartnodes[256];
2342 
2343     Uint32 limit = (nodeCount / 2);
2344     for (Uint32 i = 0; i<limit; i++)
2345     {
2346       int tmp = res.getNodeGroup(nodes[i]);
2347       printf("node %d ng: %d", nodes[i], tmp);
2348       if (ng.get(tmp))
2349       {
2350         restartnodes[cnt++] = nodes[i];
2351         ndbout_c(" COLLISION");
2352         limit++;
2353         if (limit > nodeCount)
2354           limit = nodeCount;
2355       }
2356       else
2357       {
2358         ng.set(tmp);
2359         ndbout_c(" OK");
2360       }
2361     }
2362 
2363     if (cnt)
2364     {
2365       printf("restarting nodes: ");
2366       for (int i = 0; i<cnt; i++)
2367         printf("%d ", restartnodes[i]);
2368       printf("\n");
2369       for (int i = 0; i<cnt; i++)
2370       {
2371         res.restartOneDbNode(restartnodes[i], false, true, true);
2372       }
2373       res.waitNodesNoStart(restartnodes, cnt);
2374       res.startNodes(restartnodes, cnt);
2375       if (res.waitClusterStarted())
2376         return NDBT_FAILED;
2377 
2378       goto loop;
2379     }
2380 
2381     int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
2382     res.dumpStateAllNodes(val2, 2);
2383 
2384     Bitmask<256/32> mask;
2385     for (Uint32 i = 0; i<(nodeCount / 2); i++)
2386     {
2387       int node = nodes[(nodeCount / 2) - (i + 1)];
2388       mask.set(node);
2389       res.insertErrorInNode(node, 7218);
2390     }
2391 
2392     for (Uint32 i = 0; i<nodeCount; i++)
2393     {
2394       int node = nodes[i];
2395       if (mask.get(node))
2396         continue;
2397       res.insertErrorInNode(node, 7220);
2398     }
2399 
2400     int lcp = 7099;
2401     res.dumpStateAllNodes(&lcp, 1);
2402 
2403     res.waitClusterNoStart();
2404     res.startAll();
2405     if (res.waitClusterStarted())
2406       return NDBT_FAILED;
2407   }
2408 
2409   return NDBT_OK;
2410 }
2411 
2412 int
runScanUpdateUntilStopped(NDBT_Context * ctx,NDBT_Step * step)2413 runScanUpdateUntilStopped(NDBT_Context* ctx, NDBT_Step* step)
2414 {
2415   Ndb* pNdb = GETNDB(step);
2416   HugoTransactions hugoTrans(*ctx->getTab());
2417 
2418   NullOutputStream null;
2419   OutputStream * save[1];
2420   save[0] = g_err.m_out;
2421   g_err.m_out = &null;
2422   while (!ctx->isTestStopped())
2423   {
2424     hugoTrans.scanUpdateRecords(pNdb, 0);
2425   }
2426   g_err.m_out = save[0];
2427   return NDBT_OK;
2428 }
2429 
2430 int
runBug48436(NDBT_Context * ctx,NDBT_Step * step)2431 runBug48436(NDBT_Context* ctx, NDBT_Step* step)
2432 {
2433   NdbRestarter res;
2434   Uint32 loops = ctx->getNumLoops();
2435   const Uint32 nodeCount = res.getNumDbNodes();
2436   if(nodeCount < 2)
2437   {
2438     return NDBT_OK;
2439   }
2440 
2441   for (Uint32 l = 0; l<loops; l++)
2442   {
2443     int nodes[2];
2444     nodes[0] = res.getNode(NdbRestarter::NS_RANDOM);
2445     nodes[1] = res.getRandomNodeSameNodeGroup(nodes[0], rand());
2446     int val = 7099;
2447     int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
2448 
2449     ndbout_c("nodes %u %u", nodes[0], nodes[1]);
2450 
2451     for (Uint32 j = 0; j<5; j++)
2452     {
2453       int c = (rand()) % 11;
2454       ndbout_c("case: %u", c);
2455       switch(c){
2456       case 0:
2457       case 1:
2458         res.dumpStateAllNodes(&val, 1);
2459       case 2:
2460       case 3:
2461       case 4:
2462       case 5:
2463         res.restartOneDbNode(nodes[0], false, true, true);
2464         res.waitNodesNoStart(nodes+0,1);
2465         res.dumpStateOneNode(nodes[0], val2, 2);
2466         res.insertErrorInNode(nodes[0], 5054); // crash during restart
2467         res.startAll();
2468         sleep(3);
2469         res.waitNodesNoStart(nodes+0,1);
2470         res.startAll();
2471         break;
2472       case 6:
2473         res.restartOneDbNode(nodes[0], false, true, true);
2474         res.waitNodesNoStart(nodes+0, 1);
2475         res.startAll();
2476         break;
2477       case 7:
2478         res.dumpStateAllNodes(&val, 1);
2479       case 8:
2480         res.restartOneDbNode(nodes[1], false, true, true);
2481         res.waitNodesNoStart(nodes+1,1);
2482         res.dumpStateOneNode(nodes[1], val2, 2);
2483         res.insertErrorInNode(nodes[1], 5054); // crash during restart
2484         res.startAll();
2485         sleep(3);
2486         res.waitNodesNoStart(nodes+1,1);
2487         res.startAll();
2488         break;
2489       case 9:
2490         res.restartAll(false, true, true);
2491         res.waitClusterNoStart();
2492         res.startAll();
2493         break;
2494       case 10:
2495       {
2496         res.dumpStateAllNodes(val2, 2);
2497         int node = res.getMasterNodeId();
2498         res.insertErrorInNode(node, 7222);
2499         res.waitClusterNoStart();
2500         res.startAll();
2501         break;
2502       }
2503       }
2504       res.waitClusterStarted();
2505     }
2506     res.restartAll(false, true, true);
2507     res.waitClusterNoStart();
2508     res.startAll();
2509     res.waitClusterStarted();
2510   }
2511   ctx->stopTest();
2512 
2513   return NDBT_OK;
2514 }
2515 
2516 int
runBug54611(NDBT_Context * ctx,NDBT_Step * step)2517 runBug54611(NDBT_Context* ctx, NDBT_Step* step)
2518 {
2519   NdbRestarter res;
2520   Uint32 loops = ctx->getNumLoops();
2521   Ndb* pNdb = GETNDB(step);
2522   int rows = ctx->getNumRecords();
2523 
2524   HugoTransactions hugoTrans(*ctx->getTab());
2525 
2526   for (Uint32 l = 0; l<loops; l++)
2527   {
2528     int val = DumpStateOrd::DihMinTimeBetweenLCP;
2529     res.dumpStateAllNodes(&val, 1);
2530 
2531     for (Uint32 i = 0; i < 5; i++)
2532     {
2533       hugoTrans.scanUpdateRecords(pNdb, rows);
2534     }
2535 
2536     int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
2537     res.dumpStateAllNodes(val2, 2);
2538 
2539     int node = res.getMasterNodeId();
2540     res.insertErrorInNode(node, 7222);
2541 
2542     while (hugoTrans.scanUpdateRecords(pNdb, rows) == 0);
2543     res.waitClusterNoStart();
2544 
2545     res.insertErrorInAllNodes(5055);
2546     res.startAll();
2547     res.waitClusterStarted();
2548     pNdb->waitUntilReady();
2549   }
2550 
2551   return NDBT_OK;
2552 }
2553 
2554 int
runBug56961(NDBT_Context * ctx,NDBT_Step * step)2555 runBug56961(NDBT_Context* ctx, NDBT_Step* step)
2556 {
2557   NdbRestarter res;
2558   Uint32 loops = ctx->getNumLoops();
2559   Ndb* pNdb = GETNDB(step);
2560   int rows = ctx->getNumRecords();
2561 
2562   int node = res.getNode(NdbRestarter::NS_RANDOM);
2563   int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
2564   HugoTransactions hugoTrans(*ctx->getTab());
2565 
2566   for (Uint32 l = 0; l<loops; l++)
2567   {
2568     ndbout_c("Waiting for %d to restart (5058)", node);
2569     res.dumpStateOneNode(node, val2, 2);
2570     res.insertErrorInNode(node, 5058);
2571 
2572     hugoTrans.clearTable(pNdb);
2573     hugoTrans.loadTable(pNdb, rows);
2574     while (hugoTrans.scanUpdateRecords(pNdb, rows) == NDBT_OK &&
2575            res.getNodeStatus(node) != NDB_MGM_NODE_STATUS_NOT_STARTED &&
2576            res.getNodeStatus(node) != NDB_MGM_NODE_STATUS_NO_CONTACT);
2577     res.waitNodesNoStart(&node, 1);
2578     res.startNodes(&node, 1);
2579     ndbout_c("Waiting for %d to start", node);
2580     res.waitClusterStarted();
2581 
2582     ndbout_c("Waiting for %d to restart (5059)", node);
2583     res.dumpStateOneNode(node, val2, 2);
2584     res.insertErrorInNode(node, 5059);
2585 
2586     hugoTrans.clearTable(pNdb);
2587     hugoTrans.loadTable(pNdb, rows);
2588     while (hugoTrans.scanUpdateRecords(pNdb, rows) == NDBT_OK &&
2589            res.getNodeStatus(node) != NDB_MGM_NODE_STATUS_NOT_STARTED &&
2590            res.getNodeStatus(node) != NDB_MGM_NODE_STATUS_NO_CONTACT);
2591     res.waitNodesNoStart(&node, 1);
2592     res.startNodes(&node, 1);
2593     ndbout_c("Waiting for %d to start", node);
2594     res.waitClusterStarted();
2595     pNdb->waitUntilReady();
2596   }
2597 
2598   return NDBT_OK;
2599 }
2600 
runAddNodes(NDBT_Context * ctx,NDBT_Step * step)2601 int runAddNodes(NDBT_Context* ctx, NDBT_Step* step)
2602 {
2603   /*
2604    To add new nodes online, the two nodes should be already up in the cluster,
2605    with nodegroup 65536. Then they can be added to the cluster online using the
2606    ndb_mgm command create nodegroup. Here,
2607    1. we retrieve the list of such nodes with ng 65536(internally -256) and
2608    2. add them to the cluster by passing them to the mgmapi function
2609       ndb_mgm_create_nodegroup().
2610    */
2611   NdbRestarter restarter;
2612 
2613   Vector<int> newNodes;
2614   int ng;
2615 
2616   /* Retrieve the list of nodes with nodegroup 65536(-256) */
2617   for(int i= 0; i < restarter.getNumDbNodes(); i++ )
2618   {
2619     int _node_id= restarter.getDbNodeId(i);
2620     if(restarter.getNodeGroup(_node_id) == -256)
2621     {
2622       /* nodes that don't have a nodegroup yet */
2623       newNodes.push_back(_node_id);
2624     }
2625   }
2626 
2627   /* if there are no new nodes, can't test add node restart */
2628   if(newNodes.size() == 0)
2629   {
2630     g_err << "ERR: "<< step->getName()
2631         << " failed on line " << __LINE__ << endl;
2632     g_err << "Incorrect cluster configuration."
2633         << "Requires additional nodes with nodegroup 65536." << endl;
2634     return NDBT_FAILED;
2635   }
2636 
2637   /* end of array value for newNodes */
2638   newNodes.push_back(0);
2639 
2640   /* include the new nodes into cluster using ndb_mgm_create_nodegroup() */
2641   if(ndb_mgm_create_nodegroup(restarter.handle, newNodes.getBase(),
2642                               &ng, NULL) != 0)
2643   {
2644     g_err << "ERR: "<< step->getName()
2645         << " failed on line " << __LINE__ << endl;
2646     g_err << ndb_mgm_get_latest_error_desc(restarter.handle) << endl;
2647     return NDBT_FAILED;
2648   }
2649   g_info << "New nodes added to nodegroup " << ng << endl;
2650 
2651   return NDBT_OK;
2652 }
2653 
runAlterTableAndOptimize(NDBT_Context * ctx,NDBT_Step * step)2654 int runAlterTableAndOptimize(NDBT_Context* ctx, NDBT_Step* step)
2655 {
2656   NdbRestarter restarter;
2657   /* check if there is a possibility of node killing during redistribution */
2658   bool nodesKilledDuringStep= ctx->getProperty("NodesKilledDuringStep");
2659 
2660   /* Redistribute existing cluster data */
2661   DbUtil sql("TEST_DB");
2662   {
2663     BaseString query;
2664     int numOfTables = ctx->getNumTables();
2665 
2666     /* ALTER ONLINE TABLE <tbl_name> REORGANIZE PARTITION */
2667     for(int i= 0; i < numOfTables; i++ )
2668     {
2669       SqlResultSet resultSet;
2670       query.assfmt("ALTER ONLINE TABLE %s REORGANIZE PARTITION",
2671                    ctx->getTableName(i));
2672       g_info << "Executing query : "<< query.c_str() << endl;
2673 
2674       if(!sql.doQuery(query.c_str(), resultSet)){
2675         if(nodesKilledDuringStep &&
2676            sql.getErrorNumber() == 0)
2677         {
2678           /* query failed probably because of a node kill in another step.
2679              wait for the nodes to get into start phase before retrying */
2680           if(restarter.waitClusterStarted() != 0){
2681             g_err << "Cluster went down during reorganize partition" << endl;
2682             return NDBT_FAILED;
2683           }
2684           /* retry the query for same table */
2685           i--;
2686           nodesKilledDuringStep= false;
2687           continue;
2688         } else {
2689           /* either the query failed due to returning error code from server
2690            or cluster crash */
2691           g_err << "QUERY : "<< query.c_str() << "; failed" << endl;
2692           return NDBT_FAILED;
2693         }
2694       }
2695     }
2696 
2697     if(nodesKilledDuringStep){
2698       /* Nodes were supposed to be killed during alter table,
2699          but they never were. Test lost its purpose. Mark it as failed
2700          Mostly won't happen. Just insuring. */
2701       g_err << "Nodes were never killed during alter table." << endl;
2702       return NDBT_FAILED;
2703     }
2704 
2705     /* Reclaim freed space by running optimize table */
2706     for(int i= 0; i < numOfTables; i++ )
2707     {
2708       SqlResultSet result;
2709       BaseString query;
2710       query.assfmt("OPTIMIZE TABLE %s", ctx->getTableName(i));
2711       g_info << "Executing query : "<< query.c_str() << endl;
2712       if (!sql.doQuery(query.c_str(), result)){
2713         g_err << "Failed executing optimize table" << endl;
2714         return NDBT_FAILED;
2715       }
2716     }
2717   }
2718   return NDBT_OK;
2719 }
2720 
runKillTwoNodes(NDBT_Context * ctx,NDBT_Step * step)2721 int runKillTwoNodes(NDBT_Context* ctx, NDBT_Step* step)
2722 {
2723   NdbRestarter restarter;
2724   int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
2725   int kill[] = { 9999, 3000, 10000 };
2726   int result = NDBT_OK;
2727 
2728   Vector<int> nodes;
2729 
2730   /* choose first victim */
2731   nodes.push_back(restarter.getDbNodeId(rand() % restarter.getNumDbNodes()));
2732   /* select a node from different group as next victim */
2733   nodes.push_back(restarter.getRandomNodeOtherNodeGroup(nodes[0], rand()));
2734   for(int i = 0; i < 2; i++){
2735     g_info << "Killing node " << nodes[i] << "..." << endl;
2736     CHECK(restarter.dumpStateOneNode(nodes[i], val, 2) == 0);
2737     CHECK(restarter.dumpStateOneNode(nodes[i], kill, 3) == 0);
2738   }
2739 
2740   /* wait for both of them to come into no start */
2741   if(restarter.waitNodesNoStart(nodes.getBase(), 2) != 0)
2742   {
2743     g_err << "Nodes never restarted" << endl;
2744     return NDBT_FAILED;
2745   }
2746 
2747   /* start the killed nodes */
2748   if(restarter.startNodes(nodes.getBase(), 2) != 0)
2749   {
2750     g_err << "Unable to start killed node." << endl;
2751     return NDBT_FAILED;
2752   }
2753 
2754   /* wait for nodes to get started */
2755   if(restarter.waitNodesStarted(nodes.getBase(), nodes.size()) != 0)
2756   {
2757     g_err << "Killed nodes stuck in start phase." << endl;
2758     return NDBT_FAILED;
2759   }
2760 
2761   return result;
2762 }
2763 
runRestartOneNode(NDBT_Context * ctx,NDBT_Step * step)2764 int runRestartOneNode(NDBT_Context* ctx, NDBT_Step* step){
2765   Ndb* pNdb = GETNDB(step);
2766   int result = NDBT_OK;
2767   int timeout = 300;
2768   int records = ctx->getNumRecords();
2769   int count;
2770   NdbRestarter restarter;
2771   const int nodeCount = restarter.getNumDbNodes();
2772   if(nodeCount < 2){
2773     g_info << "RestartOneNode - Needs atleast 2 nodes to test" << endl;
2774     return NDBT_OK;
2775   }
2776   Vector<int> nodeIds;
2777   for(int i = 0; i<nodeCount; i++)
2778     nodeIds.push_back(restarter.getDbNodeId(i));
2779   Uint32 currentRestartNodeIndex = 0;
2780   HugoTransactions hugoTrans(*ctx->getTab());
2781   int cnt = nodeCount;
2782   /**
2783   1. Load data
2784   2. One by one restart all nodes with -nostart
2785   3. Verify records
2786   **/
2787 
2788   /*** 1 ***/
2789   g_info << "1- Loading Data " << endl;
2790   hugoTrans.loadTable(pNdb, records);
2791 
2792   while(cnt-- && result != NDBT_FAILED)
2793   {
2794     /*** 2 ***/
2795     g_info << "2- Restarting node : " << nodeIds[currentRestartNodeIndex]<< endl;
2796 
2797     CHECK(restarter.restartOneDbNode(nodeIds[currentRestartNodeIndex],
2798                                           false,//Initial
2799                                           true,//nostart
2800                                           false//abort
2801                                           ) == 0);
2802     CHECK(restarter.waitNodesNoStart(&nodeIds[currentRestartNodeIndex], 1, timeout) == 0);
2803     CHECK(restarter.startNodes(&nodeIds[currentRestartNodeIndex], 1) == 0);
2804     CHECK(restarter.waitNodesStarted(&nodeIds[currentRestartNodeIndex], 1, timeout) == 0);
2805     currentRestartNodeIndex = (currentRestartNodeIndex + 1 ) % nodeCount;
2806   }
2807 
2808   /*** 3 ***/
2809   ndbout << "3- Verifying records..." << endl;
2810   if(hugoTrans.selectCount(pNdb, 64, &count) )
2811     return NDBT_FAILED;
2812   if(hugoTrans.clearTable(pNdb))
2813     return NDBT_FAILED;
2814 
2815   /*** done ***/
2816   g_info << "runRestartOneNode finished" << endl;
2817   return result;
2818 }
2819 
runMixedModeRestart(NDBT_Context * ctx,NDBT_Step * step)2820 int runMixedModeRestart(NDBT_Context* ctx, NDBT_Step* step){
2821   int result = NDBT_OK;
2822   int timeout = 300;
2823   NdbRestarter restarter;
2824   const int nodeCount = restarter.getNumDbNodes();
2825   if(nodeCount < 4){
2826     g_info << "MixedModeRestart - Needs atleast 4 nodes to test" << endl;
2827     return NDBT_OK;
2828   }
2829   Vector<int> nodeIds;
2830   for(int i = 0; i<nodeCount; i++)
2831     nodeIds.push_back(restarter.getDbNodeId(i));
2832   int nodeToKill = nodeIds[0];
2833   int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
2834   /**
2835   1. Killing two nodes of diffrent groups.
2836   2. Starting nodes with and without --initial option.
2837   **/
2838 
2839   /*** 1 ***/
2840   g_info << "1- Killing two nodes..." << endl;
2841   int otherNodeToKill = restarter.getRandomNodeOtherNodeGroup(nodeToKill,rand());
2842   if(otherNodeToKill == -1)
2843     return NDBT_FAILED;
2844 
2845   int kill[] = { 9999, 3000, 10000 };
2846 
2847   g_info <<"    Killing node : "<< nodeToKill << endl;
2848   if(restarter.dumpStateOneNode(nodeToKill, val, 2))
2849     return NDBT_FAILED;
2850   if(restarter.dumpStateOneNode(nodeToKill, kill, 3))
2851     return NDBT_FAILED;
2852 
2853   g_info <<"    Killing node : "<< otherNodeToKill << endl;
2854   if(restarter.dumpStateOneNode(otherNodeToKill, val, 2))
2855     return NDBT_FAILED;
2856   if(restarter.dumpStateOneNode(otherNodeToKill, kill, 3))
2857     return NDBT_FAILED;
2858 
2859   /*** 2 ***/
2860   g_info << "2 - Starting nodes with and without --initial option..." << endl;
2861 
2862   if(restarter.restartOneDbNode(nodeToKill,
2863                                 false,//Initial
2864                                 true,//nostart
2865                                 false//abort
2866                                 ))
2867     return NDBT_FAILED;
2868   if(restarter.waitNodesNoStart(&nodeToKill, 1, timeout))
2869     return NDBT_FAILED;
2870   if(restarter.startNodes(&nodeToKill, 1))
2871     return NDBT_FAILED;
2872   if(restarter.waitNodesStarted(&nodeToKill, 1, timeout))
2873     return NDBT_FAILED;
2874 
2875   if(restarter.restartOneDbNode(otherNodeToKill,
2876                                 true,//Initial
2877                                 true,//nostart
2878                                 false//abort
2879                                 ))
2880     return NDBT_FAILED;
2881   if(restarter.waitNodesNoStart(&otherNodeToKill, 1, timeout))
2882     return NDBT_FAILED;
2883   if(restarter.startNodes(&otherNodeToKill, 1))
2884     return NDBT_FAILED;
2885   if(restarter.waitNodesStarted(&otherNodeToKill, 1, timeout))
2886     return NDBT_FAILED;
2887 
2888   /*** done ***/
2889   g_info << "runMixedModeRestart finished" << endl;
2890   return result;
2891 }
2892 
runStartWithNodeGroupZero(NDBT_Context * ctx,NDBT_Step * step)2893 int runStartWithNodeGroupZero(NDBT_Context* ctx, NDBT_Step* step){
2894   int result = NDBT_OK;
2895   int timeout = 300;
2896   NdbRestarter restarter;
2897   const int nodeCount = restarter.getNumDbNodes();
2898   if(nodeCount < 4){
2899     g_info << "StartWithNodeGroupZero - Needs atleast 4 nodes to test" << endl;
2900     return NDBT_OK;
2901   }
2902   Vector<int> nodeIds;
2903   for(int i = 0; i<nodeCount; i++)
2904     nodeIds.push_back(restarter.getDbNodeId(i));
2905   int nodeId = nodeIds[0];
2906   int cnt = nodeCount;
2907   int nodeGroup = 0;
2908   while(cnt-- && nodeGroup == 0 && result != NDBT_FAILED)
2909   {
2910     /**
2911     1. Finding a node of group id other then 0.
2912     2. Restart that node
2913     3. Check the group id of the above node
2914     **/
2915     /*** 1 ***/
2916     g_info << "1- Findind a node of group id other then 0" << endl;
2917     nodeGroup = restarter.getNodeGroup(nodeId);
2918     g_info << "    Current node group : " << nodeGroup << endl;
2919     if(nodeGroup == 0)
2920     {
2921       g_info << "    Skiping this node" << endl;
2922       nodeId = restarter.getRandomNodeOtherNodeGroup(nodeId, 4);
2923       continue;
2924     }
2925 
2926     /*** 2 ***/
2927     g_info << "2- Restarting node : " << nodeId << " whose Group id is "
2928            << nodeGroup << endl;
2929 
2930     CHECK(restarter.restartOneDbNode(nodeId,
2931                                      true,//Initial
2932                                      true,//nostart
2933                                      false//abort
2934                                      ) == 0);
2935     CHECK(restarter.waitNodesNoStart(&nodeId, 1, timeout) == 0);
2936     CHECK(restarter.startNodes(&nodeId, 1) == 0);
2937     CHECK(restarter.waitNodesStarted(&nodeId, 1, timeout) == 0);
2938     nodeGroup = restarter.getNodeGroup(nodeId);
2939     /*** 3 ***/
2940     g_info << "3- Checking its group id" << endl;
2941     CHECK(nodeGroup !=0)
2942     g_info << "    current node group : " << nodeGroup << endl;
2943   }
2944 
2945   /*** done ***/
2946   g_info << "runStartWithNodeGroupZero finished" << endl;
2947 
2948   return result;
2949 }
2950 
runMixedModeRestart4Node(NDBT_Context * ctx,NDBT_Step * step)2951 int runMixedModeRestart4Node(NDBT_Context* ctx, NDBT_Step* step){
2952   int result = NDBT_OK;
2953   NdbRestarter restarter;
2954   const int nodeCount = restarter.getNumDbNodes();
2955   if(nodeCount < 8){
2956     g_info << "MixedModeRestart4Node - Needs atleast 8 nodes to test" << endl;
2957     return NDBT_OK;
2958   }
2959   Vector<int> nodeIds;
2960   for(int i = 0; i<nodeCount; i++)
2961     nodeIds.push_back(restarter.getDbNodeId(i));
2962   int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
2963   /**
2964   1. Killing four nodes of diffrent groups.
2965   2. Starting nodes with and without --initial option.
2966   **/
2967 
2968   /*** 1 ***/
2969   g_info << "1- Killing four nodes of diffrent groups." << endl;
2970   int nodesarray[256];
2971   int cnt = 0;
2972   int timeout = 300;
2973   Bitmask<4> seen_groups;
2974   for(int i = 0; i< nodeCount; i++)
2975   {
2976     int nodeGroup=restarter.getNodeGroup(nodeIds[i]);
2977     if (seen_groups.get(nodeGroup))
2978     {
2979       // One node in this node group already down
2980       g_info << "    Continuing as one node from this group is already killed."
2981              << " NodeGroup = " << nodeGroup << endl;
2982       continue;
2983     }
2984     seen_groups.set(nodeGroup);
2985     int kill[] = { 9999, 3000, 10000 };
2986     g_info <<"    Killing node : "<< nodeIds[i] << endl;
2987     CHECK(restarter.dumpStateOneNode(nodeIds[i], val, 2) == 0);
2988     CHECK(restarter.dumpStateOneNode(nodeIds[i], kill, 3) == 0);
2989     nodesarray[cnt++] = nodeIds[i];
2990   }
2991 
2992   /*** 2 ***/
2993   g_info << "2- Starting nodes with and without --initial option." << endl;
2994   bool flag = true;
2995   for(int i = 0; i < cnt; i++)
2996   {
2997     CHECK(restarter.restartOneDbNode(nodesarray[i],
2998                                      flag,//Initial
2999                                      true,//nostart
3000                                      false//abort
3001                                      ) == 0);
3002     CHECK(restarter.waitNodesNoStart(&nodesarray[i], 1, timeout) == 0);
3003     CHECK(restarter.startNodes(&nodesarray[i], 1) == 0);
3004     CHECK(restarter.waitNodesStarted(&nodesarray[i], 1, timeout) == 0);
3005     flag = false;
3006   }
3007 
3008    /*** done ***/
3009   g_info << "runMixedModeRestart4Node finished" << endl;
3010   return result;
3011 }
3012 
runKillMasterNodes(NDBT_Context * ctx,NDBT_Step * step)3013 int runKillMasterNodes(NDBT_Context* ctx, NDBT_Step* step){
3014   int result = NDBT_OK;
3015   NdbRestarter restarter;
3016   const int nodeCount = restarter.getNumDbNodes();
3017   if(nodeCount < 4){
3018     g_info << "KillMasterNodes - Needs atleast 4 nodes to test" << endl;
3019     return NDBT_OK;
3020   }
3021 
3022   Vector<int> nodeIds;
3023   for(int i = 0; i<nodeCount; i++)
3024     nodeIds.push_back(restarter.getDbNodeId(i));
3025   int val[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
3026   int kill[] = { 9999, 3000, 10000 };
3027   /**
3028   1. Killing only master node one by one.
3029   2. Start nodes without --initial option.
3030   **/
3031 
3032   /*** 1 ***/
3033   g_info << "1- Killing only master node one by one." << endl;
3034   int nodesarray[256];
3035   int timeout = 120;
3036   int cnt= 0;
3037   Bitmask<8> seen_groups;
3038   int master = restarter.getMasterNodeId();
3039   int newMaster;
3040   for(int i = 0; i< nodeCount; i++)
3041   {
3042     g_info << "Master Node Id : " << master << endl;
3043     int nodeGroup = restarter.getNodeGroup(master);
3044     CHECK(nodeGroup != -1);
3045     if (seen_groups.get(nodeGroup))
3046     {
3047       // One node in this node group already down
3048       g_info << "Breaking because master node belongs to the group whoes one"
3049       << "node is already down. Master = " << master << ", node Group = "
3050       << nodeGroup << endl;
3051       break;
3052     }
3053     seen_groups.set(nodeGroup);
3054     nodesarray[cnt++] = master;
3055     newMaster = restarter.getNextMasterNodeId(master);
3056     g_info <<"   killing node : "<< master << " group : " << nodeGroup << endl;
3057     CHECK(restarter.dumpStateOneNode(master, val, 2) == 0);
3058     CHECK(restarter.dumpStateOneNode(master, kill, 3) == 0);
3059     CHECK(restarter.waitNodesNoStart(&master, 1) == 0);
3060     master = newMaster;
3061   }
3062 
3063   /*** 2 ***/
3064   g_info << "2- Starting nodes without --initial option..." << endl;
3065   for(int i = 0; i<cnt; i++)
3066   {
3067     CHECK(restarter.startNodes(&nodesarray[i], 1) == 0);
3068     CHECK(restarter.waitNodesStarted(&nodesarray[i], 1, timeout) == 0);
3069   }
3070 
3071   /*** done ***/
3072   g_info << "runKillMasterNodes finished" << endl;
3073   return result;
3074 }
3075 
3076 NDBT_TESTSUITE(testSystemRestart);
3077 TESTCASE("SR1",
3078 	 "Basic system restart test. Focus on testing restart from REDO log.\n"
3079 	 "NOTE! Time between lcp's and gcp's should be left at default, \n"
3080 	 "so that Ndb  uses the Redo log when restarting\n"
3081 	 "1. Load records\n"
3082 	 "2. Restart cluster and verify records \n"
3083 	 "3. Update records\n"
3084 	 "4. Restart cluster and verify records \n"
3085 	 "5. Delete half of the records \n"
3086 	 "6. Restart cluster and verify records \n"
3087 	 "7. Delete all records \n"
3088 	 "8. Restart cluster and verify records \n"
3089 	 "9. Insert, update, delete records \n"
3090 	 "10. Restart cluster and verify records\n"
3091 	 "11. Insert, update, delete records \n"
3092 	 "12. Restart cluster with error insert 5020 and verify records\n"){
3093   INITIALIZER(runWaitStarted);
3094   STEP(runSystemRestart1);
3095 }
3096 TESTCASE("SR2",
3097 	 "Basic system restart test. Focus on testing restart from LCP\n"
3098 	 "NOTE! Time between lcp's is automatically set to it's  min value\n"
3099 	 "so that Ndb  uses LCP's when restarting.\n"
3100 	 "1. Load records\n"
3101 	 "2. Restart cluster and verify records \n"
3102 	 "3. Update records\n"
3103 	 "4. Restart cluster and verify records \n"
3104 	 "5. Delete half of the records \n"
3105 	 "6. Restart cluster and verify records \n"
3106 	 "7. Delete all records \n"
3107 	 "8. Restart cluster and verify records \n"
3108 	 "9. Insert, update, delete records \n"
3109 	 "10. Restart cluster and verify records\n"){
3110   INITIALIZER(runWaitStarted);
3111   STEP(runSystemRestart2);
3112 }
3113 TESTCASE("SR_UNDO",
3114 	 "System restart test. Focus on testing of undologging\n"
3115 	 "in DBACC and DBTUP.\n"
3116 	 "This is done by starting a LCP, turn on undologging \n"
3117 	 "but don't start writing the datapages. This will force all\n"
3118 	 "operations to be written into the undolog.\n"
3119 	 "Then write datapages and complete LCP.\n"
3120 	 "Restart the system\n"){
3121   INITIALIZER(runWaitStarted);
3122   STEP(runSystemRestartTestUndoLog);
3123 }
3124 TESTCASE("SR_FULLDB",
3125 	 "System restart test. Test to restart when DB is full.\n"){
3126   INITIALIZER(runWaitStarted);
3127   STEP(runSystemRestartTestFullDb);
3128 }
3129 TESTCASE("SR3",
3130 	 "System restart test. Focus on testing restart from with\n"
3131 	 "not all nodes alive when system went down\n"
3132 	 "* 1. Load data\n"
3133 	 "* 2. Restart 1 node -nostart\n"
3134 	 "* 3. Update records\n"
3135 	 "* 4. Restart cluster and verify records\n"
3136 	 "* 5. Restart 1 node -nostart\n"
3137 	 "* 6. Delete half of the records\n"
3138 	 "* 7. Restart cluster and verify records\n"
3139 	 "* 8. Restart 1 node -nostart\n"
3140 	 "* 9. Delete all records\n"
3141 	 "* 10. Restart cluster and verify records\n"){
3142   INITIALIZER(runWaitStarted);
3143   STEP(runSystemRestart3);
3144 }
3145 TESTCASE("SR4",
3146 	 "System restart test. Focus on testing restart from with\n"
3147 	 "not all nodes alive when system went down but running LCP at\n"
3148 	 "high speed so that sometimes a TO is required to start cluster\n"
3149 	 "* 1. Load data\n"
3150 	 "* 2. Restart 1 node -nostart\n"
3151 	 "* 3. Update records\n"
3152 	 "* 4. Restart cluster and verify records\n"
3153 	 "* 5. Restart 1 node -nostart\n"
3154 	 "* 6. Delete half of the records\n"
3155 	 "* 7. Restart cluster and verify records\n"
3156 	 "* 8. Restart 1 node -nostart\n"
3157 	 "* 9. Delete all records\n"
3158 	 "* 10. Restart cluster and verify records\n"){
3159   INITIALIZER(runWaitStarted);
3160   STEP(runSystemRestart4);
3161 }
3162 TESTCASE("SR5",
3163 	 "As SR4 but making restart aborts\n"
3164 	 "* 1. Load data\n"
3165 	 "* 2. Restart 1 node -nostart\n"
3166 	 "* 3. Update records\n"
3167 	 "* 4. Restart cluster and verify records\n"
3168 	 "* 5. Restart 1 node -nostart\n"
3169 	 "* 6. Delete half of the records\n"
3170 	 "* 7. Restart cluster and verify records\n"
3171 	 "* 8. Restart 1 node -nostart\n"
3172 	 "* 9. Delete all records\n"
3173 	 "* 10. Restart cluster and verify records\n"){
3174   INITIALIZER(runWaitStarted);
3175   STEP(runSystemRestart5);
3176 }
3177 TESTCASE("SR6",
3178 	 "Perform system restart with some nodes having FS others wo/\n"
3179 	 "* 1. Load data\n"
3180 	 "* 2. Restart all node -nostart\n"
3181 	 "* 3. Restart some nodes -i -nostart\n"
3182 	 "* 4. Start all nodes verify records\n"){
3183   INITIALIZER(runWaitStarted);
3184   INITIALIZER(runClearTable);
3185   STEP(runSystemRestart6);
3186 }
3187 TESTCASE("SR7",
3188 	 "Perform partition win system restart\n"
3189 	 "* 1. Load data\n"
3190 	 "* 2. Restart all node -nostart\n"
3191 	 "* 3. Start all but one node\n"
3192 	 "* 4. Verify records\n"
3193 	 "* 5. Start last node\n"
3194 	 "* 6. Verify records\n"){
3195   INITIALIZER(runWaitStarted);
3196   INITIALIZER(runClearTable);
3197   STEP(runSystemRestart7);
3198 }
3199 TESTCASE("SR8",
3200 	 "Perform partition win system restart with other nodes delayed\n"
3201 	 "* 1. Load data\n"
3202 	 "* 2. Restart all node -nostart\n"
3203 	 "* 3. Start all but one node\n"
3204 	 "* 4. Wait for startphase >= 2\n"
3205 	 "* 5. Start last node\n"
3206 	 "* 6. Verify records\n"){
3207   INITIALIZER(runWaitStarted);
3208   INITIALIZER(runClearTable);
3209   STEP(runSystemRestart8);
3210 }
3211 TESTCASE("SR9",
3212 	 "Perform partition win system restart with other nodes delayed\n"
3213 	 "* 1. Start transaction\n"
3214 	 "* 2. insert (1,1)\n"
3215 	 "* 3. update (1,2)\n"
3216 	 "* 4. start lcp\n"
3217 	 "* 5. commit\n"
3218 	 "* 6. restart\n"){
3219   INITIALIZER(runWaitStarted);
3220   INITIALIZER(runClearTable);
3221   STEP(runSystemRestart9);
3222 }
3223 TESTCASE("SR10",
3224      "More tests of partitioned system restarts\n")
3225 {
3226   INITIALIZER(runWaitStarted);
3227   INITIALIZER(runClearTable);
3228   STEP(runSystemRestart10);
3229 }
3230 TESTCASE("Bug18385",
3231 	 "Perform partition system restart with other nodes with higher GCI"){
3232   INITIALIZER(runWaitStarted);
3233   INITIALIZER(runClearTable);
3234   STEP(runBug18385);
3235 }
3236 TESTCASE("Bug21536",
3237 	 "Perform partition system restart with other nodes with higher GCI"){
3238   INITIALIZER(runWaitStarted);
3239   INITIALIZER(runClearTable);
3240   STEP(runBug21536);
3241 }
3242 TESTCASE("Bug24664",
3243 	 "Check handling of LCP skip/keep")
3244 {
3245   INITIALIZER(runWaitStarted);
3246   INITIALIZER(runClearTable);
3247   STEP(runBug24664);
3248 }
3249 TESTCASE("Bug27434",
3250 	 "")
3251 {
3252   INITIALIZER(runWaitStarted);
3253   STEP(runBug27434);
3254 }
3255 TESTCASE("SR_DD_1", "")
3256 {
3257   TC_PROPERTY("ALL", 1);
3258   INITIALIZER(runWaitStarted);
3259   INITIALIZER(clearOldBackups);
3260   STEP(runStopper);
3261   STEP(runSR_DD_1);
3262 }
3263 TESTCASE("SR_DD_1b", "")
3264 {
3265   INITIALIZER(runWaitStarted);
3266   INITIALIZER(clearOldBackups);
3267   STEP(runSR_DD_1);
3268 }
3269 TESTCASE("SR_DD_1_LCP", "")
3270 {
3271   TC_PROPERTY("ALL", 1);
3272   TC_PROPERTY("LCP", 1);
3273   INITIALIZER(runWaitStarted);
3274   INITIALIZER(clearOldBackups);
3275   STEP(runStopper);
3276   STEP(runSR_DD_1);
3277 }
3278 TESTCASE("SR_DD_1b_LCP", "")
3279 {
3280   TC_PROPERTY("LCP", 1);
3281   INITIALIZER(runWaitStarted);
3282   INITIALIZER(clearOldBackups);
3283   STEP(runSR_DD_1);
3284 }
3285 TESTCASE("SR_DD_2", "")
3286 {
3287   TC_PROPERTY("ALL", 1);
3288   INITIALIZER(runWaitStarted);
3289   INITIALIZER(clearOldBackups);
3290   STEP(runStopper);
3291   STEP(runSR_DD_2);
3292 }
3293 TESTCASE("SR_DD_2b", "")
3294 {
3295   INITIALIZER(runWaitStarted);
3296   INITIALIZER(clearOldBackups);
3297   STEP(runSR_DD_2);
3298 }
3299 TESTCASE("SR_DD_2_LCP", "")
3300 {
3301   TC_PROPERTY("ALL", 1);
3302   TC_PROPERTY("LCP", 1);
3303   INITIALIZER(runWaitStarted);
3304   INITIALIZER(clearOldBackups);
3305   STEP(runStopper);
3306   STEP(runSR_DD_2);
3307 }
3308 TESTCASE("SR_DD_2b_LCP", "")
3309 {
3310   TC_PROPERTY("LCP", 1);
3311   INITIALIZER(runWaitStarted);
3312   INITIALIZER(clearOldBackups);
3313   STEP(runSR_DD_2);
3314 }
3315 TESTCASE("SR_DD_3", "")
3316 {
3317   TC_PROPERTY("ALL", 1);
3318   INITIALIZER(runWaitStarted);
3319   INITIALIZER(clearOldBackups);
3320   STEP(runStopper);
3321   STEP(runSR_DD_3);
3322 }
3323 TESTCASE("SR_DD_3b", "")
3324 {
3325   INITIALIZER(runWaitStarted);
3326   INITIALIZER(clearOldBackups);
3327   STEP(runSR_DD_3);
3328 }
3329 TESTCASE("SR_DD_3_LCP", "")
3330 {
3331   TC_PROPERTY("ALL", 1);
3332   TC_PROPERTY("LCP", 1);
3333   INITIALIZER(runWaitStarted);
3334   INITIALIZER(clearOldBackups);
3335   STEP(runStopper);
3336   STEP(runSR_DD_3);
3337 }
3338 TESTCASE("SR_DD_3b_LCP", "")
3339 {
3340   TC_PROPERTY("LCP", 1);
3341   INITIALIZER(runWaitStarted);
3342   INITIALIZER(clearOldBackups);
3343   STEP(runSR_DD_3);
3344 }
3345 TESTCASE("Bug29167", "")
3346 {
3347   INITIALIZER(runWaitStarted);
3348   STEP(runBug29167);
3349 }
3350 TESTCASE("Bug28770",
3351          "Check readTableFile1 fails, readTableFile2 succeeds\n"
3352          "1. Restart all node -nostart\n"
3353          "2. Insert error 6100 into all nodes\n"
3354          "3. Start all nodes\n"
3355          "4. Ensure cluster start\n"
3356          "5. Read and verify reocrds\n"
3357          "6. Repeat until looping is completed\n"){
3358   INITIALIZER(runWaitStarted);
3359   INITIALIZER(runClearTable);
3360   STEP(runBug28770);
3361 }
3362 TESTCASE("Bug22696", "")
3363 {
3364   INITIALIZER(runWaitStarted);
3365   INITIALIZER(runLoadTable);
3366   INITIALIZER(runBug22696);
3367 }
3368 TESTCASE("to", "Take-over during SR")
3369 {
3370   INITIALIZER(runWaitStarted);
3371   INITIALIZER(runLoadTable);
3372   INITIALIZER(runTO);
3373 }
3374 TESTCASE("basic", "")
3375 {
3376   INITIALIZER(runWaitStarted);
3377   INITIALIZER(runCreateAllTables);
3378   STEP(runBasic);
3379   FINALIZER(runDropAllTables);
3380 }
3381 TESTCASE("Bug41915", "")
3382 {
3383   TC_PROPERTY("ALL", 1);
3384   TC_PROPERTY("ERROR", 5053);
3385   TC_PROPERTY("ROWS", 30);
3386   INITIALIZER(runWaitStarted);
3387   STEP(runStopper);
3388   STEP(runSR_DD_2);
3389 }
3390 TESTCASE("Bug45154", "")
3391 {
3392   INITIALIZER(runBug45154);
3393 }
3394 TESTCASE("Bug46651", "")
3395 {
3396   INITIALIZER(runBug46651);
3397 }
3398 TESTCASE("Bug46412", "")
3399 {
3400   INITIALIZER(runBug46412);
3401 }
3402 TESTCASE("Bug48436", "")
3403 {
3404   INITIALIZER(runLoadTable);
3405   STEP(runBug48436);
3406   STEP(runScanUpdateUntilStopped);
3407 }
3408 TESTCASE("Bug54611", "")
3409 {
3410   INITIALIZER(runLoadTable);
3411   INITIALIZER(runBug54611);
3412 }
3413 TESTCASE("Bug56961", "")
3414 {
3415   INITIALIZER(runLoadTable);
3416   INITIALIZER(runBug56961);
3417 }
3418 TESTCASE("MTR_AddNodesAndRestart1",
3419          "1. Insert few rows to table"
3420          "2. Add nodes to the cluster"
3421          "3. Reorganize partition and optimize table"
3422          "Should be run only once")
3423 {
3424   ALL_TABLES();
3425   INITIALIZER(runWaitStarted);
3426   INITIALIZER(runFillTable);
3427   INITIALIZER(runAddNodes);
3428   STEP(runAlterTableAndOptimize);
3429   VERIFIER(runVerifyFilledTables);
3430 }
3431 TESTCASE("MTR_AddNodesAndRestart2",
3432          "1. Fill the table fully"
3433          "2. Add nodes to the cluster"
3434          "3. Reorganize partition and optimize table"
3435          "4. Kill 2 nodes during reorganization"
3436          "Should be run only once")
3437 {
3438   ALL_TABLES();
3439   TC_PROPERTY("NodesKilledDuringStep", true);
3440   INITIALIZER(runWaitStarted);
3441   INITIALIZER(runFillTable);
3442   INITIALIZER(runAddNodes);
3443   STEP(runAlterTableAndOptimize);
3444   STEP(runKillTwoNodes);
3445   VERIFIER(runVerifyFilledTables);
3446 }
3447 TESTCASE("RestartOneNode",
3448 	 "Perform one nodes restart\n"
3449 	 "* 1. Load data\n"
3450 	 "* 2. Restart 1 node\n"
3451 	 "* 3. Verify records\n"){
3452   INITIALIZER(runWaitStarted);
3453   STEP(runRestartOneNode);
3454 }
3455 TESTCASE("MixedModeRestart",
3456          "Perform kiiling of two node and starting them\n"
3457          "* 1. Killing two nodes of diffrent groups\n"
3458          "* 2. Starting nodes with and without --initial option\n"){
3459   INITIALIZER(runWaitStarted);
3460   STEP(runMixedModeRestart);
3461 }
3462 TESTCASE("StartWithNodeGroupZero",
3463          "check that a node doesn't always attached to group 0 while restart\n"
3464          "* 1. Finding a node of group id other then 0\n"
3465          "* 2. Restart that node\n"
3466          "* 3. Check the group id of the above node\n"){
3467   INITIALIZER(runWaitStarted);
3468   STEP(runStartWithNodeGroupZero);
3469 }
3470 TESTCASE("MixedModeRestart4Node",
3471          "Perform killing of four nodes and starting them\n"
3472          "* 1. Killing four nodes of diffrent groups\n"
3473          "* 2. Starting nodes with and without --initial option\n"){
3474   INITIALIZER(runWaitStarted);
3475   STEP(runMixedModeRestart4Node);
3476 }
3477 TESTCASE("KillMasterNodes",
3478 	 "perform Killing of master node and then starting them\n"
3479 	 "* 1. Killing only the master nodes one by one\n"
3480          "* 2. Start without --initial option\n"){
3481   INITIALIZER(runWaitStarted);
3482   STEP(runKillMasterNodes);
3483 }
3484 NDBT_TESTSUITE_END(testSystemRestart);
3485 
main(int argc,const char ** argv)3486 int main(int argc, const char** argv){
3487   ndb_init();
3488   NDBT_TESTSUITE_INSTANCE(testSystemRestart);
3489   return testSystemRestart.execute(argc, argv);
3490 }
3491