1 /*
2    Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 
25 #include <time.h>
26 #include <ndb_global.h>
27 #include <ndb_opts.h>
28 
29 #include <mgmapi.h>
30 #include <NdbMain.h>
31 #include <NdbOut.hpp>
32 #include <NdbSleep.h>
33 #include <NdbTick.h>
34 
35 #include <NDBT.hpp>
36 
37 #include <kernel/NodeBitmask.hpp>
38 
39 static int
40 waitClusterStatus(const char* _addr, ndb_mgm_node_status _status);
41 
42 static int _no_contact = 0;
43 static int _not_started = 0;
44 static int _single_user = 0;
45 static int _timeout = 120;
46 static const char* _wait_nodes = 0;
47 static const char* _nowait_nodes = 0;
48 static NdbNodeBitmask nowait_nodes_bitmask;
49 
50 const char *load_default_groups[]= { "mysql_cluster",0 };
51 
52 static struct my_option my_long_options[] =
53 {
54   NDB_STD_OPTS("ndb_waiter"),
55   { "no-contact", 'n', "Wait for cluster no contact",
56     (uchar**) &_no_contact, (uchar**) &_no_contact, 0,
57     GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
58   { "not-started", NDB_OPT_NOSHORT, "Wait for cluster not started",
59     (uchar**) &_not_started, (uchar**) &_not_started, 0,
60     GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
61   { "single-user", NDB_OPT_NOSHORT,
62     "Wait for cluster to enter single user mode",
63     (uchar**) &_single_user, (uchar**) &_single_user, 0,
64     GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
65   { "timeout", 't', "Timeout to wait in seconds",
66     (uchar**) &_timeout, (uchar**) &_timeout, 0,
67     GET_INT, REQUIRED_ARG, 120, 0, 0, 0, 0, 0 },
68   { "wait-nodes", 'w', "Node ids to wait on, e.g. '1,2-4'",
69     (uchar**) &_wait_nodes, (uchar**) &_wait_nodes, 0,
70     GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 },
71   { "nowait-nodes", NDB_OPT_NOSHORT,
72     "Nodes that will not be waited for, e.g. '2,3,4-7'",
73     (uchar**) &_nowait_nodes, (uchar**) &_nowait_nodes, 0,
74     GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 },
75   { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
76 };
77 
short_usage_sub(void)78 static void short_usage_sub(void)
79 {
80   ndb_short_usage_sub(NULL);
81 }
82 
usage()83 static void usage()
84 {
85   ndb_usage(short_usage_sub, load_default_groups, my_long_options);
86 }
87 
88 extern "C"
catch_signal(int signum)89 void catch_signal(int signum)
90 {
91 }
92 
93 #include "../src/common/util/parse_mask.hpp"
94 
main(int argc,char ** argv)95 int main(int argc, char** argv){
96   NDB_INIT(argv[0]);
97   ndb_opt_set_usage_funcs(short_usage_sub, usage);
98   load_defaults("my",load_default_groups,&argc,&argv);
99 
100 #ifndef DBUG_OFF
101   opt_debug= "d:t:O,/tmp/ndb_waiter.trace";
102 #endif
103 
104 #ifndef _WIN32
105   // Catching signal to allow testing of EINTR safeness
106   // with "while killall -USR1 ndbwaiter; do true; done"
107   signal(SIGUSR1, catch_signal);
108 #endif
109 
110   if (handle_options(&argc, &argv, my_long_options,
111                      ndb_std_get_one_option))
112     return NDBT_ProgramExit(NDBT_WRONGARGS);
113 
114   const char* connect_string = argv[0];
115   if (connect_string == 0)
116     connect_string = opt_ndb_connectstring;
117 
118   enum ndb_mgm_node_status wait_status;
119   if (_no_contact)
120   {
121     wait_status= NDB_MGM_NODE_STATUS_NO_CONTACT;
122   }
123   else if (_not_started)
124   {
125     wait_status= NDB_MGM_NODE_STATUS_NOT_STARTED;
126   }
127   else if (_single_user)
128   {
129     wait_status= NDB_MGM_NODE_STATUS_SINGLEUSER;
130   }
131   else
132   {
133     wait_status= NDB_MGM_NODE_STATUS_STARTED;
134   }
135 
136   if (_nowait_nodes)
137   {
138     int res = parse_mask(_nowait_nodes, nowait_nodes_bitmask);
139     if(res == -2 || (res > 0 && nowait_nodes_bitmask.get(0)))
140     {
141       ndbout_c("Invalid nodeid specified in nowait-nodes: %s",
142                _nowait_nodes);
143       exit(-1);
144     }
145     else if (res < 0)
146     {
147       ndbout_c("Unable to parse nowait-nodes argument: %s",
148                _nowait_nodes);
149       exit(-1);
150     }
151   }
152 
153   if (_wait_nodes)
154   {
155     if (_nowait_nodes)
156     {
157       ndbout_c("Can not set both wait-nodes and nowait-nodes.");
158       exit(-1);
159     }
160 
161     int res = parse_mask(_wait_nodes, nowait_nodes_bitmask);
162     if (res == -2 || (res > 0 && nowait_nodes_bitmask.get(0)))
163     {
164       ndbout_c("Invalid nodeid specified in wait-nodes: %s",
165                _wait_nodes);
166       exit(-1);
167     }
168     else if (res < 0)
169     {
170       ndbout_c("Unable to parse wait-nodes argument: %s",
171                _wait_nodes);
172       exit(-1);
173     }
174 
175     // Don't wait for any other nodes than the ones we have set explicitly
176     nowait_nodes_bitmask.bitNOT();
177   }
178 
179   if (waitClusterStatus(connect_string, wait_status) != 0)
180     return NDBT_ProgramExit(NDBT_FAILED);
181   return NDBT_ProgramExit(NDBT_OK);
182 }
183 
184 #define MGMERR(h) \
185   ndbout << "latest_error="<<ndb_mgm_get_latest_error(h) \
186 	 << ", line="<<ndb_mgm_get_latest_error_line(h) \
187 	 << endl;
188 
189 NdbMgmHandle handle= NULL;
190 
191 Vector<ndb_mgm_node_state> ndbNodes;
192 
193 int
getStatus()194 getStatus(){
195   int retries = 0;
196   struct ndb_mgm_cluster_state * status;
197   struct ndb_mgm_node_state * node;
198 
199   ndbNodes.clear();
200 
201   while(retries < 10){
202     status = ndb_mgm_get_status(handle);
203     if (status == NULL){
204       ndbout << "status==NULL, retries="<<retries<<endl;
205       MGMERR(handle);
206       retries++;
207       ndb_mgm_disconnect(handle);
208       if (ndb_mgm_connect(handle,0,0,1)) {
209         MGMERR(handle);
210         g_err  << "Reconnect failed" << endl;
211         break;
212       }
213       continue;
214     }
215     int count = status->no_of_nodes;
216     for (int i = 0; i < count; i++){
217       node = &status->node_states[i];
218       switch(node->node_type){
219       case NDB_MGM_NODE_TYPE_NDB:
220         if (!nowait_nodes_bitmask.get(node->node_id))
221           ndbNodes.push_back(*node);
222 	break;
223       case NDB_MGM_NODE_TYPE_MGM:
224         /* Don't care about MGM nodes */
225 	break;
226       case NDB_MGM_NODE_TYPE_API:
227         /* Don't care about API nodes */
228 	break;
229       default:
230 	if(node->node_status == NDB_MGM_NODE_STATUS_UNKNOWN ||
231 	   node->node_status == NDB_MGM_NODE_STATUS_NO_CONTACT){
232 	  retries++;
233 	  ndbNodes.clear();
234 	  free(status);
235 	  status = NULL;
236           count = 0;
237 
238 	  ndbout << "kalle"<< endl;
239 	  break;
240 	}
241 	abort();
242 	break;
243       }
244     }
245     if(status == 0){
246       ndbout << "status == 0" << endl;
247       continue;
248     }
249     free(status);
250     return 0;
251   }
252 
253   return -1;
254 }
255 
256 char*
getTimeAsString(char * pStr)257 getTimeAsString(char* pStr)
258 {
259   time_t now;
260   now= ::time((time_t*)NULL);
261 
262   struct tm* tm_now;
263 #ifdef NDB_WIN32
264   tm_now = localtime(&now);
265 #else
266   tm_now = ::localtime(&now); //uses the "current" timezone
267 #endif
268 
269   BaseString::snprintf(pStr, 9,
270 	   "%02d:%02d:%02d",
271 	   tm_now->tm_hour,
272 	   tm_now->tm_min,
273 	   tm_now->tm_sec);
274 
275   return pStr;
276 }
277 
278 static int
waitClusterStatus(const char * _addr,ndb_mgm_node_status _status)279 waitClusterStatus(const char* _addr,
280 		  ndb_mgm_node_status _status)
281 {
282   int _startphase = -1;
283 
284 #ifndef NDB_WIN
285   /* Ignore SIGPIPE */
286   signal(SIGPIPE, SIG_IGN);
287 #endif
288 
289   handle = ndb_mgm_create_handle();
290   if (handle == NULL){
291     g_err << "Could not create ndb_mgm handle" << endl;
292     return -1;
293   }
294   g_info << "Connecting to mgmsrv at " << _addr << endl;
295   if (ndb_mgm_set_connectstring(handle, _addr))
296   {
297     MGMERR(handle);
298     g_err  << "Connectstring " << _addr << " invalid" << endl;
299     return -1;
300   }
301   if (ndb_mgm_connect(handle,0,0,1)) {
302     MGMERR(handle);
303     g_err  << "Connection to " << _addr << " failed" << endl;
304     return -1;
305   }
306 
307   int attempts = 0;
308   int resetAttempts = 0;
309   const int MAX_RESET_ATTEMPTS = 10;
310   bool allInState = false;
311 
312   Uint64 time_now = NdbTick_CurrentMillisecond();
313   Uint64 timeout_time = time_now + 1000 * _timeout;
314 
315   while (allInState == false){
316     if (_timeout > 0 && time_now > timeout_time){
317       /**
318        * Timeout has expired waiting for the nodes to enter
319        * the state we want
320        */
321       bool waitMore = false;
322       /**
323        * Make special check if we are waiting for
324        * cluster to become started
325        */
326       if(_status == NDB_MGM_NODE_STATUS_STARTED){
327 	waitMore = true;
328 	/**
329 	 * First check if any node is not starting
330 	 * then it's no idea to wait anymore
331 	 */
332 	for (size_t n = 0; n < ndbNodes.size(); n++){
333 	  if (ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTED &&
334 	      ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTING)
335 	    waitMore = false;
336 
337 	}
338       }
339 
340       if (!waitMore || resetAttempts > MAX_RESET_ATTEMPTS){
341 	g_err << "waitNodeState("
342 	      << ndb_mgm_get_node_status_string(_status)
343 	      <<", "<<_startphase<<")"
344 	      << " timeout after " << attempts << " attempts" << endl;
345 	return -1;
346       }
347 
348       g_err << "waitNodeState("
349 	    << ndb_mgm_get_node_status_string(_status)
350 	    <<", "<<_startphase<<")"
351 	    << " resetting timeout "
352 	    << resetAttempts << endl;
353 
354       timeout_time = time_now + 1000 * _timeout;
355 
356       resetAttempts++;
357     }
358 
359     if (attempts > 0)
360       NdbSleep_MilliSleep(100);
361     if (getStatus() != 0){
362       return -1;
363     }
364 
365     /* Assume all nodes are in state(if there is any) */
366     allInState = (ndbNodes.size() > 0);
367 
368     /* Loop through all nodes and check their state */
369     for (size_t n = 0; n < ndbNodes.size(); n++) {
370       ndb_mgm_node_state* ndbNode = &ndbNodes[n];
371 
372       assert(ndbNode != NULL);
373 
374       g_info << "Node " << ndbNode->node_id << ": "
375 	     << ndb_mgm_get_node_status_string(ndbNode->node_status)<< endl;
376 
377       if (ndbNode->node_status !=  _status)
378 	  allInState = false;
379     }
380 
381     if (!allInState) {
382       char time[9];
383       g_info << "[" << getTimeAsString(time) << "] "
384              << "Waiting for cluster enter state "
385              << ndb_mgm_get_node_status_string(_status) << endl;
386     }
387 
388     attempts++;
389 
390     time_now = NdbTick_CurrentMillisecond();
391   }
392   return 0;
393 }
394 
395 template class Vector<ndb_mgm_node_state>;
396