1 /*
2 Copyright (c) 2004, 2014, Oracle and/or its affiliates. All rights reserved.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #include <ndb_global.h>
26 #include <ndb_opts.h>
27 #include <time.h>
28
29 #include <mgmapi.h>
30 #include <NdbMain.h>
31 #include <NdbOut.hpp>
32 #include <NdbSleep.h>
33 #include <NdbTick.h>
34 #include <portlib/ndb_localtime.h>
35
36 #include <NDBT.hpp>
37
38 #include <kernel/NodeBitmask.hpp>
39
40 static int
41 waitClusterStatus(const char* _addr, ndb_mgm_node_status _status);
42
43 static int _no_contact = 0;
44 static int _not_started = 0;
45 static int _single_user = 0;
46 static int _timeout = 120; // Seconds
47 static const char* _wait_nodes = 0;
48 static const char* _nowait_nodes = 0;
49 static NdbNodeBitmask nowait_nodes_bitmask;
50
51 const char *load_default_groups[]= { "mysql_cluster",0 };
52
53 static struct my_option my_long_options[] =
54 {
55 NDB_STD_OPTS("ndb_waiter"),
56 { "no-contact", 'n', "Wait for cluster no contact",
57 (uchar**) &_no_contact, (uchar**) &_no_contact, 0,
58 GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
59 { "not-started", NDB_OPT_NOSHORT, "Wait for cluster not started",
60 (uchar**) &_not_started, (uchar**) &_not_started, 0,
61 GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
62 { "single-user", NDB_OPT_NOSHORT,
63 "Wait for cluster to enter single user mode",
64 (uchar**) &_single_user, (uchar**) &_single_user, 0,
65 GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
66 { "timeout", 't', "Timeout to wait in seconds",
67 (uchar**) &_timeout, (uchar**) &_timeout, 0,
68 GET_INT, REQUIRED_ARG, 120, 0, 0, 0, 0, 0 },
69 { "wait-nodes", 'w', "Node ids to wait on, e.g. '1,2-4'",
70 (uchar**) &_wait_nodes, (uchar**) &_wait_nodes, 0,
71 GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 },
72 { "nowait-nodes", NDB_OPT_NOSHORT,
73 "Nodes that will not be waited for, e.g. '2,3,4-7'",
74 (uchar**) &_nowait_nodes, (uchar**) &_nowait_nodes, 0,
75 GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 },
76 { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
77 };
78
short_usage_sub(void)79 static void short_usage_sub(void)
80 {
81 ndb_short_usage_sub(NULL);
82 }
83
usage()84 static void usage()
85 {
86 ndb_usage(short_usage_sub, load_default_groups, my_long_options);
87 }
88
89 extern "C"
catch_signal(int signum)90 void catch_signal(int signum)
91 {
92 }
93
94 #include "../src/common/util/parse_mask.hpp"
95
main(int argc,char ** argv)96 int main(int argc, char** argv){
97 NDB_INIT(argv[0]);
98 ndb_opt_set_usage_funcs(short_usage_sub, usage);
99 ndb_load_defaults(NULL,load_default_groups,&argc,&argv);
100
101 #ifndef DBUG_OFF
102 opt_debug= "d:t:O,/tmp/ndb_waiter.trace";
103 #endif
104
105 #ifndef _WIN32
106 // Catching signal to allow testing of EINTR safeness
107 // with "while killall -USR1 ndbwaiter; do true; done"
108 signal(SIGUSR1, catch_signal);
109 #endif
110
111 if (handle_options(&argc, &argv, my_long_options,
112 ndb_std_get_one_option))
113 return NDBT_ProgramExit(NDBT_WRONGARGS);
114
115 const char* connect_string = argv[0];
116 if (connect_string == 0)
117 connect_string = opt_ndb_connectstring;
118
119 enum ndb_mgm_node_status wait_status;
120 if (_no_contact)
121 {
122 wait_status= NDB_MGM_NODE_STATUS_NO_CONTACT;
123 }
124 else if (_not_started)
125 {
126 wait_status= NDB_MGM_NODE_STATUS_NOT_STARTED;
127 }
128 else if (_single_user)
129 {
130 wait_status= NDB_MGM_NODE_STATUS_SINGLEUSER;
131 }
132 else
133 {
134 wait_status= NDB_MGM_NODE_STATUS_STARTED;
135 }
136
137 if (_nowait_nodes)
138 {
139 int res = parse_mask(_nowait_nodes, nowait_nodes_bitmask);
140 if(res == -2 || (res > 0 && nowait_nodes_bitmask.get(0)))
141 {
142 ndbout_c("Invalid nodeid specified in nowait-nodes: %s",
143 _nowait_nodes);
144 exit(-1);
145 }
146 else if (res < 0)
147 {
148 ndbout_c("Unable to parse nowait-nodes argument: %s",
149 _nowait_nodes);
150 exit(-1);
151 }
152 }
153
154 if (_wait_nodes)
155 {
156 if (_nowait_nodes)
157 {
158 ndbout_c("Can not set both wait-nodes and nowait-nodes.");
159 exit(-1);
160 }
161
162 int res = parse_mask(_wait_nodes, nowait_nodes_bitmask);
163 if (res == -2 || (res > 0 && nowait_nodes_bitmask.get(0)))
164 {
165 ndbout_c("Invalid nodeid specified in wait-nodes: %s",
166 _wait_nodes);
167 exit(-1);
168 }
169 else if (res < 0)
170 {
171 ndbout_c("Unable to parse wait-nodes argument: %s",
172 _wait_nodes);
173 exit(-1);
174 }
175
176 // Don't wait for any other nodes than the ones we have set explicitly
177 nowait_nodes_bitmask.bitNOT();
178 }
179
180 if (waitClusterStatus(connect_string, wait_status) != 0)
181 return NDBT_ProgramExit(NDBT_FAILED);
182 return NDBT_ProgramExit(NDBT_OK);
183 }
184
185 #define MGMERR(h) \
186 ndbout << "latest_error="<<ndb_mgm_get_latest_error(h) \
187 << ", line="<<ndb_mgm_get_latest_error_line(h) \
188 << endl;
189
190 NdbMgmHandle handle= NULL;
191
192 Vector<ndb_mgm_node_state> ndbNodes;
193
194 int
getStatus()195 getStatus(){
196 int retries = 0;
197 struct ndb_mgm_cluster_state * status;
198 struct ndb_mgm_node_state * node;
199
200 ndbNodes.clear();
201
202 while(retries < 10){
203 status = ndb_mgm_get_status(handle);
204 if (status == NULL){
205 ndbout << "status==NULL, retries="<<retries<<endl;
206 MGMERR(handle);
207 retries++;
208 ndb_mgm_disconnect(handle);
209 if (ndb_mgm_connect(handle,0,0,1)) {
210 MGMERR(handle);
211 g_err << "Reconnect failed" << endl;
212 break;
213 }
214 continue;
215 }
216 int count = status->no_of_nodes;
217 for (int i = 0; i < count; i++){
218 node = &status->node_states[i];
219 switch(node->node_type){
220 case NDB_MGM_NODE_TYPE_NDB:
221 if (!nowait_nodes_bitmask.get(node->node_id))
222 ndbNodes.push_back(*node);
223 break;
224 case NDB_MGM_NODE_TYPE_MGM:
225 /* Don't care about MGM nodes */
226 break;
227 case NDB_MGM_NODE_TYPE_API:
228 /* Don't care about API nodes */
229 break;
230 default:
231 if(node->node_status == NDB_MGM_NODE_STATUS_UNKNOWN ||
232 node->node_status == NDB_MGM_NODE_STATUS_NO_CONTACT){
233 retries++;
234 ndbNodes.clear();
235 free(status);
236 status = NULL;
237 count = 0;
238
239 ndbout << "kalle"<< endl;
240 break;
241 }
242 abort();
243 break;
244 }
245 }
246 if(status == 0){
247 ndbout << "status == 0" << endl;
248 continue;
249 }
250 free(status);
251 return 0;
252 }
253
254 return -1;
255 }
256
257 static
258 char*
getTimeAsString(char * pStr,size_t len)259 getTimeAsString(char* pStr, size_t len)
260 {
261 // Get current time
262 time_t now;
263 time(&now);
264
265 // Convert to local timezone
266 tm tm_buf;
267 ndb_localtime_r(&now, &tm_buf);
268
269 // Print to string buffer
270 BaseString::snprintf(pStr, len,
271 "%02d:%02d:%02d",
272 tm_buf.tm_hour,
273 tm_buf.tm_min,
274 tm_buf.tm_sec);
275 return pStr;
276 }
277
278 static int
waitClusterStatus(const char * _addr,ndb_mgm_node_status _status)279 waitClusterStatus(const char* _addr,
280 ndb_mgm_node_status _status)
281 {
282 int _startphase = -1;
283
284 #ifndef NDB_WIN
285 /* Ignore SIGPIPE */
286 signal(SIGPIPE, SIG_IGN);
287 #endif
288
289 handle = ndb_mgm_create_handle();
290 if (handle == NULL){
291 g_err << "Could not create ndb_mgm handle" << endl;
292 return -1;
293 }
294 g_info << "Connecting to mgmsrv at " << _addr << endl;
295 if (ndb_mgm_set_connectstring(handle, _addr))
296 {
297 MGMERR(handle);
298 g_err << "Connectstring " << _addr << " invalid" << endl;
299 return -1;
300 }
301 if (ndb_mgm_connect(handle,0,0,1)) {
302 MGMERR(handle);
303 g_err << "Connection to " << _addr << " failed" << endl;
304 return -1;
305 }
306
307 int attempts = 0;
308 int resetAttempts = 0;
309 const int MAX_RESET_ATTEMPTS = 10;
310 bool allInState = false;
311
312 NDB_TICKS start = NdbTick_getCurrentTicks();
313 NDB_TICKS now = start;
314
315 while (allInState == false){
316 if (_timeout > 0 &&
317 NdbTick_Elapsed(start,now).seconds() > (Uint64)_timeout){
318 /**
319 * Timeout has expired waiting for the nodes to enter
320 * the state we want
321 */
322 bool waitMore = false;
323 /**
324 * Make special check if we are waiting for
325 * cluster to become started
326 */
327 if(_status == NDB_MGM_NODE_STATUS_STARTED)
328 {
329 waitMore = true;
330 /**
331 * First check if any node is not starting
332 * then it's no idea to wait anymore
333 */
334 for (unsigned n = 0; n < ndbNodes.size(); n++)
335 {
336 if (ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTED &&
337 ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTING)
338 {
339 waitMore = false;
340 break;
341 }
342 }
343 }
344
345 if (!waitMore || resetAttempts > MAX_RESET_ATTEMPTS){
346 g_err << "waitNodeState("
347 << ndb_mgm_get_node_status_string(_status)
348 <<", "<<_startphase<<")"
349 << " timeout after " << attempts << " attempts" << endl;
350 return -1;
351 }
352
353 g_err << "waitNodeState("
354 << ndb_mgm_get_node_status_string(_status)
355 <<", "<<_startphase<<")"
356 << " resetting timeout "
357 << resetAttempts << endl;
358
359 start = now;
360
361 resetAttempts++;
362 }
363
364 if (attempts > 0)
365 NdbSleep_MilliSleep(100);
366 if (getStatus() != 0){
367 return -1;
368 }
369
370 /* Assume all nodes are in state(if there is any) */
371 allInState = (ndbNodes.size() > 0);
372
373 /* Loop through all nodes and check their state */
374 for (unsigned n = 0; n < ndbNodes.size(); n++) {
375 ndb_mgm_node_state* ndbNode = &ndbNodes[n];
376
377 require(ndbNode != NULL);
378
379 g_info << "Node " << ndbNode->node_id << ": "
380 << ndb_mgm_get_node_status_string(ndbNode->node_status)<< endl;
381
382 if (ndbNode->node_status != _status)
383 allInState = false;
384 }
385
386 if (!allInState) {
387 char timestamp[9];
388 g_info << "[" << getTimeAsString(timestamp, sizeof(timestamp)) << "] "
389 << "Waiting for cluster enter state "
390 << ndb_mgm_get_node_status_string(_status) << endl;
391 }
392
393 attempts++;
394
395 now = NdbTick_getCurrentTicks();
396 }
397 return 0;
398 }
399
400 template class Vector<ndb_mgm_node_state>;
401