1 /*
2    Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 
25 #include "NdbMixRestarter.hpp"
26 
NdbMixRestarter(unsigned * _seed,const char * _addr)27 NdbMixRestarter::NdbMixRestarter(unsigned * _seed, const char* _addr) :
28   NdbRestarter(_addr),
29   m_mask(~(Uint32)0)
30 {
31   if (_seed == 0)
32   {
33     ownseed = (unsigned)NdbTick_CurrentMillisecond();
34     seed = &ownseed;
35   }
36   else
37   {
38     seed = _seed;
39   }
40 }
41 
~NdbMixRestarter()42 NdbMixRestarter::~NdbMixRestarter()
43 {
44 
45 }
46 
47 #define CHECK(b) if (!(b)) { \
48   ndbout << "ERR: "<< step->getName() \
49          << " failed on line " << __LINE__ << endl; \
50   result = NDBT_FAILED; \
51   continue; }
52 
53 int
restart_cluster(NDBT_Context * ctx,NDBT_Step * step,bool stopabort)54 NdbMixRestarter::restart_cluster(NDBT_Context* ctx,
55                                  NDBT_Step* step,
56                                  bool stopabort)
57 {
58   int timeout = 180;
59   int result = NDBT_OK;
60 
61   do
62   {
63     ctx->setProperty(NMR_SR_THREADS_STOPPED, (Uint32)0);
64     ctx->setProperty(NMR_SR_VALIDATE_THREADS_DONE, (Uint32)0);
65 
66     ndbout << " -- Shutting down " << endl;
67     ctx->setProperty(NMR_SR, NdbMixRestarter::SR_STOPPING);
68     CHECK(restartAll(false, true, stopabort) == 0);
69     ctx->setProperty(NMR_SR, NdbMixRestarter::SR_STOPPED);
70     CHECK(waitClusterNoStart(timeout) == 0);
71 
72     Uint32 cnt = ctx->getProperty(NMR_SR_THREADS);
73     Uint32 curr= ctx->getProperty(NMR_SR_THREADS_STOPPED);
74     while(curr != cnt && !ctx->isTestStopped())
75     {
76       if (curr > cnt)
77       {
78         ndbout_c("stopping: curr: %d cnt: %d", curr, cnt);
79         abort();
80       }
81 
82       NdbSleep_MilliSleep(100);
83       curr= ctx->getProperty(NMR_SR_THREADS_STOPPED);
84     }
85 
86     CHECK(ctx->isTestStopped() == false);
87     CHECK(startAll() == 0);
88     CHECK(waitClusterStarted(timeout) == 0);
89 
90     cnt = ctx->getProperty(NMR_SR_VALIDATE_THREADS);
91     if (cnt)
92     {
93       ndbout << " -- Validating starts " << endl;
94       ctx->setProperty(NMR_SR_VALIDATE_THREADS_DONE, (Uint32)0);
95       ctx->setProperty(NMR_SR, NdbMixRestarter::SR_VALIDATING);
96       curr = ctx->getProperty(NMR_SR_VALIDATE_THREADS_DONE);
97       while (curr != cnt && !ctx->isTestStopped())
98       {
99         if (curr > cnt)
100         {
101           ndbout_c("validating: curr: %d cnt: %d", curr, cnt);
102           abort();
103         }
104 
105         NdbSleep_MilliSleep(100);
106         curr = ctx->getProperty(NMR_SR_VALIDATE_THREADS_DONE);
107       }
108       ndbout << " -- Validating complete " << endl;
109     }
110     CHECK(ctx->isTestStopped() == false);
111     ctx->setProperty(NMR_SR, NdbMixRestarter::SR_RUNNING);
112 
113   } while(0);
114 
115   return result;
116 }
117 
118 static
119 void
select_nodes_to_stop(Vector<ndb_mgm_node_state * > & victims,Vector<ndb_mgm_node_state> & nodes)120 select_nodes_to_stop(Vector<ndb_mgm_node_state*>& victims,
121                      Vector<ndb_mgm_node_state>& nodes)
122 {
123   Uint32 i, j;
124   Vector<ndb_mgm_node_state*> alive_nodes;
125   for(i = 0; i<nodes.size(); i++)
126   {
127     ndb_mgm_node_state* node = &nodes[i];
128     if (node->node_status == NDB_MGM_NODE_STATUS_STARTED)
129       alive_nodes.push_back(node);
130   }
131 
132   // Remove those with one in node group
133   for(i = 0; i<alive_nodes.size(); i++)
134   {
135     int group = alive_nodes[i]->node_group;
136     for(j = 0; j<alive_nodes.size(); j++)
137     {
138       if (i != j && alive_nodes[j]->node_group == group)
139       {
140 	victims.push_back(alive_nodes[i]);
141 	break;
142       }
143     }
144   }
145 }
146 
147 static
148 ndb_mgm_node_state*
select_node_to_stop(unsigned * seed,Vector<ndb_mgm_node_state> & nodes)149 select_node_to_stop(unsigned * seed, Vector<ndb_mgm_node_state>& nodes)
150 {
151   Vector<ndb_mgm_node_state*> victims;
152   select_nodes_to_stop(victims, nodes);
153 
154   if (victims.size())
155   {
156     int victim = ndb_rand_r(seed) % victims.size();
157     return victims[victim];
158   }
159   else
160   {
161     return 0;
162   }
163 }
164 
165 static
166 void
select_nodes_to_start(Vector<ndb_mgm_node_state * > & victims,Vector<ndb_mgm_node_state> & nodes)167 select_nodes_to_start(Vector<ndb_mgm_node_state*>& victims,
168                       Vector<ndb_mgm_node_state>& nodes)
169 {
170   Uint32 i;
171   for(i = 0; i<nodes.size(); i++)
172   {
173     ndb_mgm_node_state* node = &nodes[i];
174     if (node->node_status == NDB_MGM_NODE_STATUS_NOT_STARTED)
175       victims.push_back(node);
176   }
177 }
178 
179 static
180 ndb_mgm_node_state*
select_node_to_start(unsigned * seed,Vector<ndb_mgm_node_state> & nodes)181 select_node_to_start(unsigned * seed,
182                      Vector<ndb_mgm_node_state>& nodes)
183 {
184   Vector<ndb_mgm_node_state*> victims;
185   select_nodes_to_start(victims, nodes);
186 
187   if (victims.size())
188   {
189     int victim = ndb_rand_r(seed) % victims.size();
190     return victims[victim];
191   }
192   else
193   {
194     return 0;
195   }
196 }
197 
198 void
setRestartTypeMask(Uint32 mask)199 NdbMixRestarter::setRestartTypeMask(Uint32 mask)
200 {
201   m_mask = mask;
202 }
203 
204 int
runUntilStopped(NDBT_Context * ctx,NDBT_Step * step,Uint32 freq)205 NdbMixRestarter::runUntilStopped(NDBT_Context* ctx,
206                                  NDBT_Step* step,
207                                  Uint32 freq)
208 {
209   if (init(ctx, step))
210     return NDBT_FAILED;
211 
212   while (!ctx->isTestStopped())
213   {
214     if (dostep(ctx, step))
215       return NDBT_FAILED;
216     NdbSleep_SecSleep(freq);
217   }
218 
219   if (!finish(ctx, step))
220     return NDBT_FAILED;
221 
222   return NDBT_OK;
223 }
224 
225 int
runPeriod(NDBT_Context * ctx,NDBT_Step * step,Uint32 period,Uint32 freq)226 NdbMixRestarter::runPeriod(NDBT_Context* ctx,
227                            NDBT_Step* step,
228                            Uint32 period, Uint32 freq)
229 {
230   if (init(ctx, step))
231     return NDBT_FAILED;
232 
233   Uint32 stop = (Uint32)time(0) + period;
234   while (!ctx->isTestStopped() && (time(0) < stop))
235   {
236     if (dostep(ctx, step))
237     {
238       return NDBT_FAILED;
239     }
240     NdbSleep_SecSleep(freq);
241   }
242 
243   if (finish(ctx, step))
244   {
245     return NDBT_FAILED;
246   }
247 
248   ctx->stopTest();
249   return NDBT_OK;
250 }
251 
252 int
init(NDBT_Context * ctx,NDBT_Step * step)253 NdbMixRestarter::init(NDBT_Context* ctx, NDBT_Step* step)
254 {
255   waitClusterStarted();
256   m_nodes = ndbNodes;
257   return 0;
258 }
259 
260 int
dostep(NDBT_Context * ctx,NDBT_Step * step)261 NdbMixRestarter::dostep(NDBT_Context* ctx, NDBT_Step* step)
262 {
263   ndb_mgm_node_state* node = 0;
264   int action;
265 loop:
266   while(((action = (1 << (ndb_rand_r(seed) % RTM_COUNT))) & m_mask) == 0);
267   switch(action){
268   case RTM_RestartCluster:
269     if (restart_cluster(ctx, step))
270       return NDBT_FAILED;
271     for (Uint32 i = 0; i<m_nodes.size(); i++)
272       m_nodes[i].node_status = NDB_MGM_NODE_STATUS_STARTED;
273     break;
274   case RTM_RestartNode:
275   case RTM_RestartNodeInitial:
276   case RTM_StopNode:
277   case RTM_StopNodeInitial:
278   {
279     if ((node = select_node_to_stop(seed, m_nodes)) == 0)
280       goto loop;
281 
282     if (action == RTM_RestartNode || action == RTM_RestartNodeInitial)
283       ndbout << "Restarting " << node->node_id;
284     else
285       ndbout << "Stopping " << node->node_id;
286 
287     bool initial =
288       action == RTM_RestartNodeInitial || action == RTM_StopNodeInitial;
289 
290     if (initial)
291       ndbout << " inital";
292     ndbout << endl;
293 
294     if (restartOneDbNode(node->node_id, initial, true, true))
295       return NDBT_FAILED;
296 
297     if (waitNodesNoStart(&node->node_id, 1))
298       return NDBT_FAILED;
299 
300     node->node_status = NDB_MGM_NODE_STATUS_NOT_STARTED;
301 
302     if (action == RTM_StopNode || action == RTM_StopNodeInitial)
303       break;
304     else
305       goto start;
306   }
307   case RTM_StartNode:
308     if ((node = select_node_to_start(seed, m_nodes)) == 0)
309       goto loop;
310 start:
311     ndbout << "Starting " << node->node_id << endl;
312     if (startNodes(&node->node_id, 1))
313       return NDBT_FAILED;
314     if (waitNodesStarted(&node->node_id, 1))
315       return NDBT_FAILED;
316 
317     node->node_status = NDB_MGM_NODE_STATUS_STARTED;
318     break;
319   }
320   return NDBT_OK;
321 }
322 
323 int
finish(NDBT_Context * ctx,NDBT_Step * step)324 NdbMixRestarter::finish(NDBT_Context* ctx, NDBT_Step* step)
325 {
326   Vector<int> not_started;
327   {
328     ndb_mgm_node_state* node = 0;
329     while((node = select_node_to_start(seed, m_nodes)))
330     {
331       not_started.push_back(node->node_id);
332       node->node_status = NDB_MGM_NODE_STATUS_STARTED;
333     }
334   }
335 
336   if (not_started.size())
337   {
338     ndbout << "Starting stopped nodes " << endl;
339     if (startNodes(not_started.getBase(), not_started.size()))
340       return NDBT_FAILED;
341     if (waitClusterStarted())
342       return NDBT_FAILED;
343   }
344   return NDBT_OK;
345 }
346 
347 template class Vector<ndb_mgm_node_state*>;
348