1 /*
2    Copyright (c) 2004, 2014, Oracle and/or its affiliates. All rights reserved.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 
25 #include <ndb_global.h>
26 #include <ndb_opts.h>
27 #include <time.h>
28 
29 #include <mgmapi.h>
30 #include <NdbMain.h>
31 #include <NdbOut.hpp>
32 #include <NdbSleep.h>
33 #include <NdbTick.h>
34 #include <portlib/ndb_localtime.h>
35 
36 #include <NDBT.hpp>
37 
38 #include <kernel/NodeBitmask.hpp>
39 
40 static int
41 waitClusterStatus(const char* _addr, ndb_mgm_node_status _status);
42 
43 static int _no_contact = 0;
44 static int _not_started = 0;
45 static int _single_user = 0;
46 static int _timeout = 120; // Seconds
47 static const char* _wait_nodes = 0;
48 static const char* _nowait_nodes = 0;
49 static NdbNodeBitmask nowait_nodes_bitmask;
50 
51 const char *load_default_groups[]= { "mysql_cluster",0 };
52 
53 static struct my_option my_long_options[] =
54 {
55   NDB_STD_OPTS("ndb_waiter"),
56   { "no-contact", 'n', "Wait for cluster no contact",
57     (uchar**) &_no_contact, (uchar**) &_no_contact, 0,
58     GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
59   { "not-started", NDB_OPT_NOSHORT, "Wait for cluster not started",
60     (uchar**) &_not_started, (uchar**) &_not_started, 0,
61     GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
62   { "single-user", NDB_OPT_NOSHORT,
63     "Wait for cluster to enter single user mode",
64     (uchar**) &_single_user, (uchar**) &_single_user, 0,
65     GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0 },
66   { "timeout", 't', "Timeout to wait in seconds",
67     (uchar**) &_timeout, (uchar**) &_timeout, 0,
68     GET_INT, REQUIRED_ARG, 120, 0, 0, 0, 0, 0 },
69   { "wait-nodes", 'w', "Node ids to wait on, e.g. '1,2-4'",
70     (uchar**) &_wait_nodes, (uchar**) &_wait_nodes, 0,
71     GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 },
72   { "nowait-nodes", NDB_OPT_NOSHORT,
73     "Nodes that will not be waited for, e.g. '2,3,4-7'",
74     (uchar**) &_nowait_nodes, (uchar**) &_nowait_nodes, 0,
75     GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 },
76   { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
77 };
78 
short_usage_sub(void)79 static void short_usage_sub(void)
80 {
81   ndb_short_usage_sub(NULL);
82 }
83 
usage()84 static void usage()
85 {
86   ndb_usage(short_usage_sub, load_default_groups, my_long_options);
87 }
88 
89 extern "C"
catch_signal(int signum)90 void catch_signal(int signum)
91 {
92 }
93 
94 #include "../src/common/util/parse_mask.hpp"
95 
main(int argc,char ** argv)96 int main(int argc, char** argv){
97   NDB_INIT(argv[0]);
98   ndb_opt_set_usage_funcs(short_usage_sub, usage);
99   ndb_load_defaults(NULL,load_default_groups,&argc,&argv);
100 
101 #ifndef DBUG_OFF
102   opt_debug= "d:t:O,/tmp/ndb_waiter.trace";
103 #endif
104 
105 #ifndef _WIN32
106   // Catching signal to allow testing of EINTR safeness
107   // with "while killall -USR1 ndbwaiter; do true; done"
108   signal(SIGUSR1, catch_signal);
109 #endif
110 
111   if (handle_options(&argc, &argv, my_long_options,
112                      ndb_std_get_one_option))
113     return NDBT_ProgramExit(NDBT_WRONGARGS);
114 
115   const char* connect_string = argv[0];
116   if (connect_string == 0)
117     connect_string = opt_ndb_connectstring;
118 
119   enum ndb_mgm_node_status wait_status;
120   if (_no_contact)
121   {
122     wait_status= NDB_MGM_NODE_STATUS_NO_CONTACT;
123   }
124   else if (_not_started)
125   {
126     wait_status= NDB_MGM_NODE_STATUS_NOT_STARTED;
127   }
128   else if (_single_user)
129   {
130     wait_status= NDB_MGM_NODE_STATUS_SINGLEUSER;
131   }
132   else
133   {
134     wait_status= NDB_MGM_NODE_STATUS_STARTED;
135   }
136 
137   if (_nowait_nodes)
138   {
139     int res = parse_mask(_nowait_nodes, nowait_nodes_bitmask);
140     if(res == -2 || (res > 0 && nowait_nodes_bitmask.get(0)))
141     {
142       ndbout_c("Invalid nodeid specified in nowait-nodes: %s",
143                _nowait_nodes);
144       exit(-1);
145     }
146     else if (res < 0)
147     {
148       ndbout_c("Unable to parse nowait-nodes argument: %s",
149                _nowait_nodes);
150       exit(-1);
151     }
152   }
153 
154   if (_wait_nodes)
155   {
156     if (_nowait_nodes)
157     {
158       ndbout_c("Can not set both wait-nodes and nowait-nodes.");
159       exit(-1);
160     }
161 
162     int res = parse_mask(_wait_nodes, nowait_nodes_bitmask);
163     if (res == -2 || (res > 0 && nowait_nodes_bitmask.get(0)))
164     {
165       ndbout_c("Invalid nodeid specified in wait-nodes: %s",
166                _wait_nodes);
167       exit(-1);
168     }
169     else if (res < 0)
170     {
171       ndbout_c("Unable to parse wait-nodes argument: %s",
172                _wait_nodes);
173       exit(-1);
174     }
175 
176     // Don't wait for any other nodes than the ones we have set explicitly
177     nowait_nodes_bitmask.bitNOT();
178   }
179 
180   if (waitClusterStatus(connect_string, wait_status) != 0)
181     return NDBT_ProgramExit(NDBT_FAILED);
182   return NDBT_ProgramExit(NDBT_OK);
183 }
184 
185 #define MGMERR(h) \
186   ndbout << "latest_error="<<ndb_mgm_get_latest_error(h) \
187 	 << ", line="<<ndb_mgm_get_latest_error_line(h) \
188 	 << endl;
189 
190 NdbMgmHandle handle= NULL;
191 
192 Vector<ndb_mgm_node_state> ndbNodes;
193 
194 int
getStatus()195 getStatus(){
196   int retries = 0;
197   struct ndb_mgm_cluster_state * status;
198   struct ndb_mgm_node_state * node;
199 
200   ndbNodes.clear();
201 
202   while(retries < 10){
203     status = ndb_mgm_get_status(handle);
204     if (status == NULL){
205       ndbout << "status==NULL, retries="<<retries<<endl;
206       MGMERR(handle);
207       retries++;
208       ndb_mgm_disconnect(handle);
209       if (ndb_mgm_connect(handle,0,0,1)) {
210         MGMERR(handle);
211         g_err  << "Reconnect failed" << endl;
212         break;
213       }
214       continue;
215     }
216     int count = status->no_of_nodes;
217     for (int i = 0; i < count; i++){
218       node = &status->node_states[i];
219       switch(node->node_type){
220       case NDB_MGM_NODE_TYPE_NDB:
221         if (!nowait_nodes_bitmask.get(node->node_id))
222           ndbNodes.push_back(*node);
223 	break;
224       case NDB_MGM_NODE_TYPE_MGM:
225         /* Don't care about MGM nodes */
226 	break;
227       case NDB_MGM_NODE_TYPE_API:
228         /* Don't care about API nodes */
229 	break;
230       default:
231 	if(node->node_status == NDB_MGM_NODE_STATUS_UNKNOWN ||
232 	   node->node_status == NDB_MGM_NODE_STATUS_NO_CONTACT){
233 	  retries++;
234 	  ndbNodes.clear();
235 	  free(status);
236 	  status = NULL;
237           count = 0;
238 
239 	  ndbout << "kalle"<< endl;
240 	  break;
241 	}
242 	abort();
243 	break;
244       }
245     }
246     if(status == 0){
247       ndbout << "status == 0" << endl;
248       continue;
249     }
250     free(status);
251     return 0;
252   }
253 
254   return -1;
255 }
256 
257 static
258 char*
getTimeAsString(char * pStr,size_t len)259 getTimeAsString(char* pStr, size_t len)
260 {
261   // Get current time
262   time_t now;
263   time(&now);
264 
265   // Convert to local timezone
266   tm tm_buf;
267   ndb_localtime_r(&now, &tm_buf);
268 
269   // Print to string buffer
270   BaseString::snprintf(pStr, len,
271                        "%02d:%02d:%02d",
272                        tm_buf.tm_hour,
273                        tm_buf.tm_min,
274                        tm_buf.tm_sec);
275   return pStr;
276 }
277 
278 static int
waitClusterStatus(const char * _addr,ndb_mgm_node_status _status)279 waitClusterStatus(const char* _addr,
280 		  ndb_mgm_node_status _status)
281 {
282   int _startphase = -1;
283 
284 #ifndef NDB_WIN
285   /* Ignore SIGPIPE */
286   signal(SIGPIPE, SIG_IGN);
287 #endif
288 
289   handle = ndb_mgm_create_handle();
290   if (handle == NULL){
291     g_err << "Could not create ndb_mgm handle" << endl;
292     return -1;
293   }
294   g_info << "Connecting to mgmsrv at " << _addr << endl;
295   if (ndb_mgm_set_connectstring(handle, _addr))
296   {
297     MGMERR(handle);
298     g_err  << "Connectstring " << _addr << " invalid" << endl;
299     return -1;
300   }
301   if (ndb_mgm_connect(handle,0,0,1)) {
302     MGMERR(handle);
303     g_err  << "Connection to " << _addr << " failed" << endl;
304     return -1;
305   }
306 
307   int attempts = 0;
308   int resetAttempts = 0;
309   const int MAX_RESET_ATTEMPTS = 10;
310   bool allInState = false;
311 
312   NDB_TICKS start = NdbTick_getCurrentTicks();
313   NDB_TICKS now = start;
314 
315   while (allInState == false){
316     if (_timeout > 0 &&
317         NdbTick_Elapsed(start,now).seconds() > (Uint64)_timeout){
318       /**
319        * Timeout has expired waiting for the nodes to enter
320        * the state we want
321        */
322       bool waitMore = false;
323       /**
324        * Make special check if we are waiting for
325        * cluster to become started
326        */
327       if(_status == NDB_MGM_NODE_STATUS_STARTED)
328       {
329         waitMore = true;
330         /**
331          * First check if any node is not starting
332          * then it's no idea to wait anymore
333          */
334         for (unsigned n = 0; n < ndbNodes.size(); n++)
335         {
336           if (ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTED &&
337               ndbNodes[n].node_status != NDB_MGM_NODE_STATUS_STARTING)
338           {
339             waitMore = false;
340             break;
341           }
342         }
343       }
344 
345       if (!waitMore || resetAttempts > MAX_RESET_ATTEMPTS){
346 	g_err << "waitNodeState("
347 	      << ndb_mgm_get_node_status_string(_status)
348 	      <<", "<<_startphase<<")"
349 	      << " timeout after " << attempts << " attempts" << endl;
350 	return -1;
351       }
352 
353       g_err << "waitNodeState("
354 	    << ndb_mgm_get_node_status_string(_status)
355 	    <<", "<<_startphase<<")"
356 	    << " resetting timeout "
357 	    << resetAttempts << endl;
358 
359       start = now;
360 
361       resetAttempts++;
362     }
363 
364     if (attempts > 0)
365       NdbSleep_MilliSleep(100);
366     if (getStatus() != 0){
367       return -1;
368     }
369 
370     /* Assume all nodes are in state(if there is any) */
371     allInState = (ndbNodes.size() > 0);
372 
373     /* Loop through all nodes and check their state */
374     for (unsigned n = 0; n < ndbNodes.size(); n++) {
375       ndb_mgm_node_state* ndbNode = &ndbNodes[n];
376 
377       require(ndbNode != NULL);
378 
379       g_info << "Node " << ndbNode->node_id << ": "
380 	     << ndb_mgm_get_node_status_string(ndbNode->node_status)<< endl;
381 
382       if (ndbNode->node_status !=  _status)
383 	  allInState = false;
384     }
385 
386     if (!allInState) {
387       char timestamp[9];
388       g_info << "[" << getTimeAsString(timestamp, sizeof(timestamp)) << "] "
389              << "Waiting for cluster enter state "
390              << ndb_mgm_get_node_status_string(_status) << endl;
391     }
392 
393     attempts++;
394 
395     now = NdbTick_getCurrentTicks();
396   }
397   return 0;
398 }
399 
400 template class Vector<ndb_mgm_node_state>;
401