1%%--------------------------------------------------------------------
2%% %CopyrightBegin%
3%%
4%% Copyright Ericsson AB 2010-2018. All Rights Reserved.
5%%
6%% Licensed under the Apache License, Version 2.0 (the "License");
7%% you may not use this file except in compliance with the License.
8%% You may obtain a copy of the License at
9%%
10%%     http://www.apache.org/licenses/LICENSE-2.0
11%%
12%% Unless required by applicable law or agreed to in writing, software
13%% distributed under the License is distributed on an "AS IS" BASIS,
14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15%% See the License for the specific language governing permissions and
16%% limitations under the License.
17%%
18%% %CopyrightEnd%
19
20%%----------------------------------------------------------------------
21%% File    : ct_slave.erl
22%% Description : CT module for starting nodes for large-scale testing.
23%%
24%% Created : 7 April 2010
25%%----------------------------------------------------------------------
26-module(ct_slave).
27
28-export([start/1, start/2, start/3, stop/1, stop/2]).
29
30-export([slave_started/2, slave_ready/2, monitor_master/1]).
31
32-record(options, {username, password, boot_timeout, init_timeout,
33		  startup_timeout, startup_functions, monitor_master,
34		  kill_if_fail, erl_flags, env, ssh_port, ssh_opts,
35		  stop_timeout}).
36
37start(Node) ->
38    start(gethostname(), Node).
39
40start(_HostOrNode = Node, _NodeOrOpts = Opts) %% match to satiate edoc
41  when is_list(Opts) ->
42    start(gethostname(), Node, Opts);
43
44start(Host, Node) ->
45    start(Host, Node, []).
46
47start(Host, Node, Opts) ->
48    ENode = enodename(Host, Node),
49    case erlang:is_alive() of
50	false->
51	    {error, not_alive, node()};
52	true->
53	    case is_started(ENode) of
54		false->
55		    OptionsRec = fetch_options(Opts),
56		    do_start(Host, Node, OptionsRec);
57		{true, not_connected}->
58		    {error, started_not_connected, ENode};
59		{true, connected}->
60		    {error, already_started, ENode}
61	    end
62    end.
63
64stop(Node) ->
65    stop(gethostname(), Node).
66
67stop(_HostOrNode = Node, _NodeOrOpts = Opts) %% match to satiate edoc
68  when is_list(Opts) ->
69    stop(gethostname(), Node, Opts);
70
71stop(Host, Node) ->
72    stop(Host, Node, []).
73
74stop(Host, Node, Opts) ->
75    ENode = enodename(Host, Node),
76    case is_started(ENode) of
77	{true, connected}->
78	     OptionsRec = fetch_options(Opts),
79	     do_stop(ENode, OptionsRec);
80	{true, not_connected}->
81	     {error, not_connected, ENode};
82	false->
83	     {error, not_started, ENode}
84    end.
85
86%%% fetch an option value from the tagged tuple list with default
87get_option_value(Key, OptionList, Default) ->
88    case lists:keyfind(Key, 1, OptionList) of
89	false->
90	     Default;
91	{Key, Value}->
92	     Value
93    end.
94
95%%% convert option list to the option record, fill all defaults
96fetch_options(Options) ->
97    UserName = get_option_value(username, Options, []),
98    Password = get_option_value(password, Options, []),
99    BootTimeout = get_option_value(boot_timeout, Options, 3),
100    InitTimeout = get_option_value(init_timeout, Options, 1),
101    StartupTimeout = get_option_value(startup_timeout, Options, 1),
102    StartupFunctions = get_option_value(startup_functions, Options, []),
103    Monitor = get_option_value(monitor_master, Options, false),
104    KillIfFail = get_option_value(kill_if_fail, Options, true),
105    ErlFlags = get_option_value(erl_flags, Options, []),
106    EnvVars = get_option_value(env, Options, []),
107    SSHPort = get_option_value(ssh_port, Options, []),
108    SSHOpts = get_option_value(ssh_opts, Options, []),
109    StopTimeout = get_option_value(stop_timeout, Options, 5),
110    #options{username=UserName, password=Password,
111	     boot_timeout=BootTimeout, init_timeout=InitTimeout,
112	     startup_timeout=StartupTimeout, startup_functions=StartupFunctions,
113	     monitor_master=Monitor, kill_if_fail=KillIfFail,
114	     erl_flags=ErlFlags, env=EnvVars, ssh_port=SSHPort, ssh_opts=SSHOpts,
115	     stop_timeout=StopTimeout}.
116
117% send a message when slave node is started
118slave_started(ENode, MasterPid) ->
119    MasterPid ! {node_started, ENode},
120    ok.
121
122% send a message when slave node has finished startup
123slave_ready(ENode, MasterPid) ->
124    MasterPid ! {node_ready, ENode},
125    ok.
126
127% start monitoring of the master node
128monitor_master(MasterNode) ->
129    spawn(fun() -> monitor_master_int(MasterNode) end).
130
131% code of the masterdeath-waiter process
132monitor_master_int(MasterNode) ->
133    ct_util:mark_process(),
134    erlang:monitor_node(MasterNode, true),
135    receive
136        {nodedown, MasterNode}->
137	    init:stop()
138    end.
139
140% check if node is listed in the nodes()
141is_connected(ENode) ->
142    [N||N<-nodes(), N==ENode] == [ENode].
143
144% check if node is alive (ping and disconnect if pingable)
145is_started(ENode) ->
146    case is_connected(ENode) of
147	true->
148	    {true, connected};
149	false->
150	    case net_adm:ping(ENode) of
151		pang->
152		    false;
153		pong->
154		    erlang:disconnect_node(ENode),
155		    {true, not_connected}
156	    end
157    end.
158
159% make a Erlang node name from name and hostname
160enodename(Host, Node) ->
161    case lists:member($@, atom_to_list(Node)) of
162        true ->
163            Node;
164        false ->
165            list_to_atom(atom_to_list(Node)++"@"++atom_to_list(Host))
166    end.
167
168% performs actual start of the "slave" node
169do_start(Host, Node, Options) ->
170    ENode = enodename(Host, Node),
171    Functions =
172	lists:append([[{ct_slave, slave_started, [ENode, self()]}],
173		      Options#options.startup_functions,
174		      [{ct_slave, slave_ready, [ENode, self()]}]]),
175    Functions2 = if
176	Options#options.monitor_master->
177	    [{ct_slave, monitor_master, [node()]}|Functions];
178	true->
179	    Functions
180    end,
181    MasterHost = gethostname(),
182    _ = if
183	MasterHost == Host ->
184	    spawn_local_node(Node, Options);
185	true->
186	    spawn_remote_node(Host, Node, Options)
187    end,
188
189    BootTimeout = Options#options.boot_timeout,
190    InitTimeout = Options#options.init_timeout,
191    StartupTimeout = Options#options.startup_timeout,
192    Result = case wait_for_node_alive(ENode, BootTimeout) of
193	pong->
194	    case test_server:is_cover() of
195		true ->
196		    MainCoverNode = cover:get_main_node(),
197		    rpc:call(MainCoverNode,cover,start,[ENode]);
198		false ->
199		    ok
200	    end,
201            call_functions(ENode, Functions2),
202	    receive
203		{node_started, ENode}->
204		    receive
205			{node_ready, ENode}->
206			    {ok, ENode}
207		    after StartupTimeout*1000->
208			{error, startup_timeout, ENode}
209		    end
210	    after InitTimeout*1000 ->
211		{error, init_timeout, ENode}
212	    end;
213        pang->
214	    {error, boot_timeout, ENode}
215    end,
216    _ = case Result of
217	{ok, ENode}->
218	     ok;
219	{error, Timeout, ENode}
220	     when ((Timeout==init_timeout) or (Timeout==startup_timeout)) and
221		  Options#options.kill_if_fail->
222	     do_stop(ENode);
223	_-> ok
224    end,
225    Result.
226
227% are we using fully qualified hostnames
228long_or_short() ->
229    case net_kernel:longnames() of
230	true->
231	    " -name ";
232	false->
233	    " -sname "
234    end.
235
236% get the localhost's name, depending on the using name policy
237gethostname() ->
238    Hostname = case net_kernel:longnames() of
239	true->
240	    net_adm:localhost();
241	_->
242	    {ok, Name}=inet:gethostname(),
243	    Name
244    end,
245    list_to_atom(Hostname).
246
247% get cmd for starting Erlang
248get_cmd(Node, Flags) ->
249    Cookie = erlang:get_cookie(),
250    "erl -detached -noinput -setcookie "++ atom_to_list(Cookie) ++
251    long_or_short() ++ atom_to_list(Node) ++ " " ++ Flags.
252
253% spawn node locally
254spawn_local_node(Node, Options) ->
255    #options{env=Env,erl_flags=ErlFlags} = Options,
256    Cmd = get_cmd(Node, ErlFlags),
257    open_port({spawn, Cmd}, [stream,{env,Env}]).
258
259% spawn node remotely
260spawn_remote_node(Host, Node, Options) ->
261    #options{username=Username,
262	     password=Password,
263	     erl_flags=ErlFlags,
264	     env=Env,
265       ssh_port=MaybeSSHPort,
266       ssh_opts=SSHOpts} = Options,
267    SSHPort = case MaybeSSHPort of
268                [] -> 22; % Use default SSH port
269                A  -> A
270              end,
271    SSHOptions = case {Username, Password} of
272	{[], []}->
273	    [];
274	{_, []}->
275	    [{user, Username}];
276	{_, _}->
277	    [{user, Username}, {password, Password}]
278    end ++ [{silently_accept_hosts, true}] ++ SSHOpts,
279    {ok, _} = application:ensure_all_started(ssh),
280    {ok, SSHConnRef} = ssh:connect(atom_to_list(Host), SSHPort, SSHOptions),
281    {ok, SSHChannelId} = ssh_connection:session_channel(SSHConnRef, infinity),
282    ssh_setenv(SSHConnRef, SSHChannelId, Env),
283    ssh_connection:exec(SSHConnRef, SSHChannelId, get_cmd(Node, ErlFlags), infinity).
284
285ssh_setenv(SSHConnRef, SSHChannelId, [{Var, Value} | Vars])
286  when is_list(Var), is_list(Value) ->
287    success = ssh_connection:setenv(SSHConnRef, SSHChannelId,
288				    Var, Value, infinity),
289    ssh_setenv(SSHConnRef, SSHChannelId, Vars);
290ssh_setenv(_SSHConnRef, _SSHChannelId, []) -> ok.
291
292% call functions on a remote Erlang node
293call_functions(_Node, []) ->
294    ok;
295call_functions(Node, [{M, F, A}|Functions]) ->
296    rpc:call(Node, M, F, A),
297    call_functions(Node, Functions).
298
299% wait N seconds until node is pingable
300wait_for_node_alive(_Node, 0) ->
301    pang;
302wait_for_node_alive(Node, N) ->
303    timer:sleep(1000),
304    case net_adm:ping(Node) of
305	pong->
306	    pong;
307	pang->
308	    wait_for_node_alive(Node, N-1)
309    end.
310
311% call init:stop on a remote node
312do_stop(ENode) ->
313    do_stop(ENode, fetch_options([])).
314do_stop(ENode, Options) ->
315    {Cover,MainCoverNode} =
316	case test_server:is_cover() of
317	    true ->
318		Main = cover:get_main_node(),
319		rpc:call(Main,cover,flush,[ENode]),
320		{true,Main};
321	    false ->
322		{false,undefined}
323    end,
324    spawn(ENode, init, stop, []),
325    StopTimeout = Options#options.stop_timeout,
326    case wait_for_node_dead(ENode, StopTimeout) of
327	{ok,ENode} ->
328	    if Cover ->
329		    %% To avoid that cover is started again if a node
330		    %% with the same name is started later.
331		    rpc:call(MainCoverNode,cover,stop,[ENode]);
332	       true ->
333		    ok
334	    end,
335	    {ok,ENode};
336	Error ->
337	    Error
338    end.
339
340% wait N seconds until node is disconnected
341wait_for_node_dead(Node, 0) ->
342    {error, stop_timeout, Node};
343wait_for_node_dead(Node, N) ->
344    timer:sleep(1000),
345    case lists:member(Node, nodes()) of
346	true->
347	    wait_for_node_dead(Node, N-1);
348	false->
349	    {ok, Node}
350    end.
351