1 /*
2 Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #include <time.h>
26 #include <ndb_global.h>
27 #include <ndb_opts.h>
28
29 #include <mgmapi.h>
30 #include <NdbMain.h>
31 #include <NdbOut.hpp>
32 #include <NdbSleep.h>
33 #include <NdbTick.h>
34
35 #include <NDBT.hpp>
36
37 #include <kernel/NodeBitmask.hpp>
38
39 static int
40 waitClusterStatus(const char* _addr, ndb_mgm_node_status _status);
41
42 static int _no_contact = 0;
43 static int _not_started = 0;
44 static int _single_user = 0;
45 static int _timeout = 120;
46 static const char* _wait_nodes = 0;
47 static const char* _nowait_nodes = 0;
48 static NdbNodeBitmask nowait_nodes_bitmask;
49
50 const char *load_default_groups[]= { "mysql_cluster",0 };
51
52 static struct my_option my_long_options[] =
53 {
54 NDB_STD_OPTS("ndb_waiter"),
55 { "no-contact", 'n', "Wait for cluster no contact",
56 (uchar**) &_no_contact, (uchar**) &_no_contact, 0,
57 GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
58 { "not-started", NDB_OPT_NOSHORT, "Wait for cluster not started",
59 (uchar**) &_not_started, (uchar**) &_not_started, 0,
60 GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
61 { "single-user", NDB_OPT_NOSHORT,
62 "Wait for cluster to enter single user mode",
63 (uchar**) &_single_user, (uchar**) &_single_user, 0,
64 GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
65 { "timeout", 't', "Timeout to wait in seconds",
66 (uchar**) &_timeout, (uchar**) &_timeout, 0,
67 GET_INT, REQUIRED_ARG, 120, 0, 0, 0, 0, 0 },
68 { "wait-nodes", 'w', "Node ids to wait on, e.g. '1,2-4'",
69 (uchar**) &_wait_nodes, (uchar**) &_wait_nodes, 0,
70 GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 },
71 { "nowait-nodes", NDB_OPT_NOSHORT,
72 "Nodes that will not be waited for, e.g. '2,3,4-7'",
73 (uchar**) &_nowait_nodes, (uchar**) &_nowait_nodes, 0,
74 GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 },
75 { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
76 };
77
short_usage_sub(void)78 static void short_usage_sub(void)
79 {
80 ndb_short_usage_sub(NULL);
81 }
82
usage()83 static void usage()
84 {
85 ndb_usage(short_usage_sub, load_default_groups, my_long_options);
86 }
87
88 extern "C"
catch_signal(int signum)89 void catch_signal(int signum)
90 {
91 }
92
93 #include "../src/common/util/parse_mask.hpp"
94
main(int argc,char ** argv)95 int main(int argc, char** argv){
96 NDB_INIT(argv[0]);
97 ndb_opt_set_usage_funcs(short_usage_sub, usage);
98 load_defaults("my",load_default_groups,&argc,&argv);
99
100 #ifndef DBUG_OFF
101 opt_debug= "d:t:O,/tmp/ndb_waiter.trace";
102 #endif
103
104 #ifndef _WIN32
105 // Catching signal to allow testing of EINTR safeness
106 // with "while killall -USR1 ndbwaiter; do true; done"
107 signal(SIGUSR1, catch_signal);
108 #endif
109
110 if (handle_options(&argc, &argv, my_long_options,
111 ndb_std_get_one_option))
112 return NDBT_ProgramExit(NDBT_WRONGARGS);
113
114 const char* connect_string = argv[0];
115 if (connect_string == 0)
116 connect_string = opt_ndb_connectstring;
117
118 enum ndb_mgm_node_status wait_status;
119 if (_no_contact)
120 {
121 wait_status= NDB_MGM_NODE_STATUS_NO_CONTACT;
122 }
123 else if (_not_started)
124 {
125 wait_status= NDB_MGM_NODE_STATUS_NOT_STARTED;
126 }
127 else if (_single_user)
128 {
129 wait_status= NDB_MGM_NODE_STATUS_SINGLEUSER;
130 }
131 else
132 {
133 wait_status= NDB_MGM_NODE_STATUS_STARTED;
134 }
135
136 if (_nowait_nodes)
137 {
138 int res = parse_mask(_nowait_nodes, nowait_nodes_bitmask);
139 if(res == -2 || (res > 0 && nowait_nodes_bitmask.get(0)))
140 {
141 ndbout_c("Invalid nodeid specified in nowait-nodes: %s",
142 _nowait_nodes);
143 exit(-1);
144 }
145 else if (res < 0)
146 {
147 ndbout_c("Unable to parse nowait-nodes argument: %s",
148 _nowait_nodes);
149 exit(-1);
150 }
151 }
152
153 if (_wait_nodes)
154 {
155 if (_nowait_nodes)
156 {
157 ndbout_c("Can not set both wait-nodes and nowait-nodes.");
158 exit(-1);
159 }
160
161 int res = parse_mask(_wait_nodes, nowait_nodes_bitmask);
162 if (res == -2 || (res > 0 && nowait_nodes_bitmask.get(0)))
163 {
164 ndbout_c("Invalid nodeid specified in wait-nodes: %s",
165 _wait_nodes);
166 exit(-1);
167 }
168 else if (res < 0)
169 {
170 ndbout_c("Unable to parse wait-nodes argument: %s",
171 _wait_nodes);
172 exit(-1);
173 }
174
175 // Don't wait for any other nodes than the ones we have set explicitly
176 nowait_nodes_bitmask.bitNOT();
177 }
178
179 if (waitClusterStatus(connect_string, wait_status) != 0)
180 return NDBT_ProgramExit(NDBT_FAILED);
181 return NDBT_ProgramExit(NDBT_OK);
182 }
183
184 #define MGMERR(h) \
185 ndbout << "latest_error="<<ndb_mgm_get_latest_error(h) \
186 << ", line="<<ndb_mgm_get_latest_error_line(h) \
187 << endl;
188
189 NdbMgmHandle handle= NULL;
190
191 Vector<ndb_mgm_node_state> ndbNodes;
192
193 int
getStatus()194 getStatus(){
195 int retries = 0;
196 struct ndb_mgm_cluster_state * status;
197 struct ndb_mgm_node_state * node;
198
199 ndbNodes.clear();
200
201 while(retries < 10){
202 status = ndb_mgm_get_status(handle);
203 if (status == NULL){
204 ndbout << "status==NULL, retries="<<retries<<endl;
205 MGMERR(handle);
206 retries++;
207 ndb_mgm_disconnect(handle);
208 if (ndb_mgm_connect(handle,0,0,1)) {
209 MGMERR(handle);
210 g_err << "Reconnect failed" << endl;
211 break;
212 }
213 continue;
214 }
215 int count = status->no_of_nodes;
216 for (int i = 0; i < count; i++){
217 node = &status->node_states[i];
218 switch(node->node_type){
219 case NDB_MGM_NODE_TYPE_NDB:
220 if (!nowait_nodes_bitmask.get(node->node_id))
221 ndbNodes.push_back(*node);
222 break;
223 case NDB_MGM_NODE_TYPE_MGM:
224 /* Don't care about MGM nodes */
225 break;
226 case NDB_MGM_NODE_TYPE_API:
227 /* Don't care about API nodes */
228 break;
229 default:
230 if(node->node_status == NDB_MGM_NODE_STATUS_UNKNOWN ||
231 node->node_status == NDB_MGM_NODE_STATUS_NO_CONTACT){
232 retries++;
233 ndbNodes.clear();
234 free(status);
235 status = NULL;
236 count = 0;
237
238 ndbout << "kalle"<< endl;
239 break;
240 }
241 abort();
242 break;
243 }
244 }
245 if(status == 0){
246 ndbout << "status == 0" << endl;
247 continue;
248 }
249 free(status);
250 return 0;
251 }
252
253 return -1;
254 }
255
256 char*
getTimeAsString(char * pStr)257 getTimeAsString(char* pStr)
258 {
259 time_t now;
260 now= ::time((time_t*)NULL);
261
262 struct tm* tm_now;
263 #ifdef NDB_WIN32
264 tm_now = localtime(&now);
265 #else
266 tm_now = ::localtime(&now); //uses the "current" timezone
267 #endif
268
269 BaseString::snprintf(pStr, 9,
270 "%02d:%02d:%02d",
271 tm_now->tm_hour,
272 tm_now->tm_min,
273 tm_now->tm_sec);
274
275 return pStr;
276 }
277
278 static int
waitClusterStatus(const char * _addr,ndb_mgm_node_status _status)279 waitClusterStatus(const char* _addr,
280 ndb_mgm_node_status _status)
281 {
282 int _startphase = -1;
283
284 #ifndef NDB_WIN
285 /* Ignore SIGPIPE */
286 signal(SIGPIPE, SIG_IGN);
287 #endif
288
289 handle = ndb_mgm_create_handle();
290 if (handle == NULL){
291 g_err << "Could not create ndb_mgm handle" << endl;
292 return -1;
293 }
294 g_info << "Connecting to mgmsrv at " << _addr << endl;
295 if (ndb_mgm_set_connectstring(handle, _addr))
296 {
297 MGMERR(handle);
298 g_err << "Connectstring " << _addr << " invalid" << endl;
299 return -1;
300 }
301 if (ndb_mgm_connect(handle,0,0,1)) {
302 MGMERR(handle);
303 g_err << "Connection to " << _addr << " failed" << endl;
304 return -1;
305 }
306
307 int attempts = 0;
308 int resetAttempts = 0;
309 const int MAX_RESET_ATTEMPTS = 10;
310 bool allInState = false;
311
312 Uint64 time_now = NdbTick_CurrentMillisecond();
313 Uint64 timeout_time = time_now + 1000 * _timeout;
314
315 while (allInState == false){
316 if (_timeout > 0 && time_now > timeout_time){
317 /**
318 * Timeout has expired waiting for the nodes to enter
319 * the state we want
320 */
321 bool waitMore = false;
322 /**
323 * Make special check if we are waiting for
324 * cluster to become started
325 */
326 if(_status == NDB_MGM_NODE_STATUS_STARTED){
327 waitMore = true;
328 /**
329 * First check if any node is not starting
330 * then it's no idea to wait anymore
331 */
332 for (size_t n = 0; n < ndbNodes.size(); n++){
333 if (ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTED &&
334 ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTING)
335 waitMore = false;
336
337 }
338 }
339
340 if (!waitMore || resetAttempts > MAX_RESET_ATTEMPTS){
341 g_err << "waitNodeState("
342 << ndb_mgm_get_node_status_string(_status)
343 <<", "<<_startphase<<")"
344 << " timeout after " << attempts << " attempts" << endl;
345 return -1;
346 }
347
348 g_err << "waitNodeState("
349 << ndb_mgm_get_node_status_string(_status)
350 <<", "<<_startphase<<")"
351 << " resetting timeout "
352 << resetAttempts << endl;
353
354 timeout_time = time_now + 1000 * _timeout;
355
356 resetAttempts++;
357 }
358
359 if (attempts > 0)
360 NdbSleep_MilliSleep(100);
361 if (getStatus() != 0){
362 return -1;
363 }
364
365 /* Assume all nodes are in state(if there is any) */
366 allInState = (ndbNodes.size() > 0);
367
368 /* Loop through all nodes and check their state */
369 for (size_t n = 0; n < ndbNodes.size(); n++) {
370 ndb_mgm_node_state* ndbNode = &ndbNodes[n];
371
372 assert(ndbNode != NULL);
373
374 g_info << "Node " << ndbNode->node_id << ": "
375 << ndb_mgm_get_node_status_string(ndbNode->node_status)<< endl;
376
377 if (ndbNode->node_status != _status)
378 allInState = false;
379 }
380
381 if (!allInState) {
382 char time[9];
383 g_info << "[" << getTimeAsString(time) << "] "
384 << "Waiting for cluster enter state "
385 << ndb_mgm_get_node_status_string(_status) << endl;
386 }
387
388 attempts++;
389
390 time_now = NdbTick_CurrentMillisecond();
391 }
392 return 0;
393 }
394
395 template class Vector<ndb_mgm_node_state>;
396