1 /*
2 Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #include "NdbMixRestarter.hpp"
26
NdbMixRestarter(unsigned * _seed,const char * _addr)27 NdbMixRestarter::NdbMixRestarter(unsigned * _seed, const char* _addr) :
28 NdbRestarter(_addr),
29 m_mask(~(Uint32)0)
30 {
31 if (_seed == 0)
32 {
33 ownseed = (unsigned)NdbTick_CurrentMillisecond();
34 seed = &ownseed;
35 }
36 else
37 {
38 seed = _seed;
39 }
40 }
41
~NdbMixRestarter()42 NdbMixRestarter::~NdbMixRestarter()
43 {
44
45 }
46
47 #define CHECK(b) if (!(b)) { \
48 ndbout << "ERR: "<< step->getName() \
49 << " failed on line " << __LINE__ << endl; \
50 result = NDBT_FAILED; \
51 continue; }
52
53 int
restart_cluster(NDBT_Context * ctx,NDBT_Step * step,bool stopabort)54 NdbMixRestarter::restart_cluster(NDBT_Context* ctx,
55 NDBT_Step* step,
56 bool stopabort)
57 {
58 int timeout = 180;
59 int result = NDBT_OK;
60
61 do
62 {
63 ctx->setProperty(NMR_SR_THREADS_STOPPED, (Uint32)0);
64 ctx->setProperty(NMR_SR_VALIDATE_THREADS_DONE, (Uint32)0);
65
66 ndbout << " -- Shutting down " << endl;
67 ctx->setProperty(NMR_SR, NdbMixRestarter::SR_STOPPING);
68 CHECK(restartAll(false, true, stopabort) == 0);
69 ctx->setProperty(NMR_SR, NdbMixRestarter::SR_STOPPED);
70 CHECK(waitClusterNoStart(timeout) == 0);
71
72 Uint32 cnt = ctx->getProperty(NMR_SR_THREADS);
73 Uint32 curr= ctx->getProperty(NMR_SR_THREADS_STOPPED);
74 while(curr != cnt && !ctx->isTestStopped())
75 {
76 if (curr > cnt)
77 {
78 ndbout_c("stopping: curr: %d cnt: %d", curr, cnt);
79 abort();
80 }
81
82 NdbSleep_MilliSleep(100);
83 curr= ctx->getProperty(NMR_SR_THREADS_STOPPED);
84 }
85
86 CHECK(ctx->isTestStopped() == false);
87 CHECK(startAll() == 0);
88 CHECK(waitClusterStarted(timeout) == 0);
89
90 cnt = ctx->getProperty(NMR_SR_VALIDATE_THREADS);
91 if (cnt)
92 {
93 ndbout << " -- Validating starts " << endl;
94 ctx->setProperty(NMR_SR_VALIDATE_THREADS_DONE, (Uint32)0);
95 ctx->setProperty(NMR_SR, NdbMixRestarter::SR_VALIDATING);
96 curr = ctx->getProperty(NMR_SR_VALIDATE_THREADS_DONE);
97 while (curr != cnt && !ctx->isTestStopped())
98 {
99 if (curr > cnt)
100 {
101 ndbout_c("validating: curr: %d cnt: %d", curr, cnt);
102 abort();
103 }
104
105 NdbSleep_MilliSleep(100);
106 curr = ctx->getProperty(NMR_SR_VALIDATE_THREADS_DONE);
107 }
108 ndbout << " -- Validating complete " << endl;
109 }
110 CHECK(ctx->isTestStopped() == false);
111 ctx->setProperty(NMR_SR, NdbMixRestarter::SR_RUNNING);
112
113 } while(0);
114
115 return result;
116 }
117
118 static
119 void
select_nodes_to_stop(Vector<ndb_mgm_node_state * > & victims,Vector<ndb_mgm_node_state> & nodes)120 select_nodes_to_stop(Vector<ndb_mgm_node_state*>& victims,
121 Vector<ndb_mgm_node_state>& nodes)
122 {
123 Uint32 i, j;
124 Vector<ndb_mgm_node_state*> alive_nodes;
125 for(i = 0; i<nodes.size(); i++)
126 {
127 ndb_mgm_node_state* node = &nodes[i];
128 if (node->node_status == NDB_MGM_NODE_STATUS_STARTED)
129 alive_nodes.push_back(node);
130 }
131
132 // Remove those with one in node group
133 for(i = 0; i<alive_nodes.size(); i++)
134 {
135 int group = alive_nodes[i]->node_group;
136 for(j = 0; j<alive_nodes.size(); j++)
137 {
138 if (i != j && alive_nodes[j]->node_group == group)
139 {
140 victims.push_back(alive_nodes[i]);
141 break;
142 }
143 }
144 }
145 }
146
147 static
148 ndb_mgm_node_state*
select_node_to_stop(unsigned * seed,Vector<ndb_mgm_node_state> & nodes)149 select_node_to_stop(unsigned * seed, Vector<ndb_mgm_node_state>& nodes)
150 {
151 Vector<ndb_mgm_node_state*> victims;
152 select_nodes_to_stop(victims, nodes);
153
154 if (victims.size())
155 {
156 int victim = ndb_rand_r(seed) % victims.size();
157 return victims[victim];
158 }
159 else
160 {
161 return 0;
162 }
163 }
164
165 static
166 void
select_nodes_to_start(Vector<ndb_mgm_node_state * > & victims,Vector<ndb_mgm_node_state> & nodes)167 select_nodes_to_start(Vector<ndb_mgm_node_state*>& victims,
168 Vector<ndb_mgm_node_state>& nodes)
169 {
170 Uint32 i;
171 for(i = 0; i<nodes.size(); i++)
172 {
173 ndb_mgm_node_state* node = &nodes[i];
174 if (node->node_status == NDB_MGM_NODE_STATUS_NOT_STARTED)
175 victims.push_back(node);
176 }
177 }
178
179 static
180 ndb_mgm_node_state*
select_node_to_start(unsigned * seed,Vector<ndb_mgm_node_state> & nodes)181 select_node_to_start(unsigned * seed,
182 Vector<ndb_mgm_node_state>& nodes)
183 {
184 Vector<ndb_mgm_node_state*> victims;
185 select_nodes_to_start(victims, nodes);
186
187 if (victims.size())
188 {
189 int victim = ndb_rand_r(seed) % victims.size();
190 return victims[victim];
191 }
192 else
193 {
194 return 0;
195 }
196 }
197
198 void
setRestartTypeMask(Uint32 mask)199 NdbMixRestarter::setRestartTypeMask(Uint32 mask)
200 {
201 m_mask = mask;
202 }
203
204 int
runUntilStopped(NDBT_Context * ctx,NDBT_Step * step,Uint32 freq)205 NdbMixRestarter::runUntilStopped(NDBT_Context* ctx,
206 NDBT_Step* step,
207 Uint32 freq)
208 {
209 if (init(ctx, step))
210 return NDBT_FAILED;
211
212 while (!ctx->isTestStopped())
213 {
214 if (dostep(ctx, step))
215 return NDBT_FAILED;
216 NdbSleep_SecSleep(freq);
217 }
218
219 if (!finish(ctx, step))
220 return NDBT_FAILED;
221
222 return NDBT_OK;
223 }
224
225 int
runPeriod(NDBT_Context * ctx,NDBT_Step * step,Uint32 period,Uint32 freq)226 NdbMixRestarter::runPeriod(NDBT_Context* ctx,
227 NDBT_Step* step,
228 Uint32 period, Uint32 freq)
229 {
230 if (init(ctx, step))
231 return NDBT_FAILED;
232
233 Uint32 stop = (Uint32)time(0) + period;
234 while (!ctx->isTestStopped() && (time(0) < stop))
235 {
236 if (dostep(ctx, step))
237 {
238 return NDBT_FAILED;
239 }
240 NdbSleep_SecSleep(freq);
241 }
242
243 if (finish(ctx, step))
244 {
245 return NDBT_FAILED;
246 }
247
248 ctx->stopTest();
249 return NDBT_OK;
250 }
251
252 int
init(NDBT_Context * ctx,NDBT_Step * step)253 NdbMixRestarter::init(NDBT_Context* ctx, NDBT_Step* step)
254 {
255 waitClusterStarted();
256 m_nodes = ndbNodes;
257 return 0;
258 }
259
260 int
dostep(NDBT_Context * ctx,NDBT_Step * step)261 NdbMixRestarter::dostep(NDBT_Context* ctx, NDBT_Step* step)
262 {
263 ndb_mgm_node_state* node = 0;
264 int action;
265 loop:
266 while(((action = (1 << (ndb_rand_r(seed) % RTM_COUNT))) & m_mask) == 0);
267 switch(action){
268 case RTM_RestartCluster:
269 if (restart_cluster(ctx, step))
270 return NDBT_FAILED;
271 for (Uint32 i = 0; i<m_nodes.size(); i++)
272 m_nodes[i].node_status = NDB_MGM_NODE_STATUS_STARTED;
273 break;
274 case RTM_RestartNode:
275 case RTM_RestartNodeInitial:
276 case RTM_StopNode:
277 case RTM_StopNodeInitial:
278 {
279 if ((node = select_node_to_stop(seed, m_nodes)) == 0)
280 goto loop;
281
282 if (action == RTM_RestartNode || action == RTM_RestartNodeInitial)
283 ndbout << "Restarting " << node->node_id;
284 else
285 ndbout << "Stopping " << node->node_id;
286
287 bool initial =
288 action == RTM_RestartNodeInitial || action == RTM_StopNodeInitial;
289
290 if (initial)
291 ndbout << " inital";
292 ndbout << endl;
293
294 if (restartOneDbNode(node->node_id, initial, true, true))
295 return NDBT_FAILED;
296
297 if (waitNodesNoStart(&node->node_id, 1))
298 return NDBT_FAILED;
299
300 node->node_status = NDB_MGM_NODE_STATUS_NOT_STARTED;
301
302 if (action == RTM_StopNode || action == RTM_StopNodeInitial)
303 break;
304 else
305 goto start;
306 }
307 case RTM_StartNode:
308 if ((node = select_node_to_start(seed, m_nodes)) == 0)
309 goto loop;
310 start:
311 ndbout << "Starting " << node->node_id << endl;
312 if (startNodes(&node->node_id, 1))
313 return NDBT_FAILED;
314 if (waitNodesStarted(&node->node_id, 1))
315 return NDBT_FAILED;
316
317 node->node_status = NDB_MGM_NODE_STATUS_STARTED;
318 break;
319 }
320 return NDBT_OK;
321 }
322
323 int
finish(NDBT_Context * ctx,NDBT_Step * step)324 NdbMixRestarter::finish(NDBT_Context* ctx, NDBT_Step* step)
325 {
326 Vector<int> not_started;
327 {
328 ndb_mgm_node_state* node = 0;
329 while((node = select_node_to_start(seed, m_nodes)))
330 {
331 not_started.push_back(node->node_id);
332 node->node_status = NDB_MGM_NODE_STATUS_STARTED;
333 }
334 }
335
336 if (not_started.size())
337 {
338 ndbout << "Starting stopped nodes " << endl;
339 if (startNodes(not_started.getBase(), not_started.size()))
340 return NDBT_FAILED;
341 if (waitClusterStarted())
342 return NDBT_FAILED;
343 }
344 return NDBT_OK;
345 }
346
347 template class Vector<ndb_mgm_node_state*>;
348