1%%
2%% %CopyrightBegin%
3%%
4%% Copyright Ericsson AB 1996-2016. All Rights Reserved.
5%%
6%% Licensed under the Apache License, Version 2.0 (the "License");
7%% you may not use this file except in compliance with the License.
8%% You may obtain a copy of the License at
9%%
10%%     http://www.apache.org/licenses/LICENSE-2.0
11%%
12%% Unless required by applicable law or agreed to in writing, software
13%% distributed under the License is distributed on an "AS IS" BASIS,
14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15%% See the License for the specific language governing permissions and
16%% limitations under the License.
17%%
18%% %CopyrightEnd%
19%%
20-module(heart).
21
22-compile(no_native).
23% 'no_native' as part of a crude fix to make init:restart/0 work by clearing
24% all hipe inter-module information (hipe_mfa_info's in hipe_bif0.c).
25
26%%%--------------------------------------------------------------------
27%%% This is a rewrite of pre_heart from BS.3.
28%%%
29%%% The purpose of this process-module is to act as a supervisor
30%%% of the entire erlang-system. This 'heart' beats with a frequence
31%%% satisfying an external port program *not* reboot the entire
32%%% system. If however the erlang-emulator would hang, a reboot is
33%%% then needed.
34%%%
35%%% It recognizes the flag '-heart'
36%%%--------------------------------------------------------------------
37-export([start/0, init/2,
38         set_cmd/1, clear_cmd/0, get_cmd/0,
39         set_callback/2, clear_callback/0, get_callback/0,
40         set_options/1, get_options/0,
41         cycle/0]).
42
43-define(START_ACK, 1).
44-define(HEART_BEAT, 2).
45-define(SHUT_DOWN, 3).
46-define(SET_CMD, 4).
47-define(CLEAR_CMD, 5).
48-define(GET_CMD, 6).
49-define(HEART_CMD, 7).
50-define(PREPARING_CRASH, 8). % Used in beam vm
51
52-define(TIMEOUT, 5000).
53-define(CYCLE_TIMEOUT, 10000).
54-define(HEART_PORT_NAME, heart_port).
55
56%% valid heart options
57-define(SCHEDULER_CHECK_OPT, check_schedulers).
58
59-type heart_option() :: ?SCHEDULER_CHECK_OPT.
60
61-record(state,{port :: port(),
62               cmd  :: [] | binary(),
63               options :: [heart_option()],
64               callback :: 'undefined' | {atom(), atom()}}).
65
66%%---------------------------------------------------------------------
67
68-spec start() -> 'ignore' | {'error', term()} | {'ok', pid()}.
69
70start() ->
71    case whereis(heart) of
72	undefined ->
73	    %% As heart survives a init:restart/0 the Parent
74	    %% of heart must be init.
75	    %% The init process is responsible to create a link
76	    %% to heart.
77	    Pid = spawn(?MODULE, init, [self(), whereis(init)]),
78	    wait_for_init_ack(Pid);
79	Pid ->
80	    {ok, Pid}
81    end.
82
83wait_for_init_ack(From) ->
84    receive
85	{ok, From} = Ok ->
86	    Ok;
87	{no_heart, From} ->
88	    ignore;
89	{Error, From} ->
90	    {error, Error}
91    end.
92
93-spec init(pid(), pid()) -> {'no_heart', pid()} | {'start_error', pid()}.
94
95init(Starter, Parent) ->
96    process_flag(trap_exit, true),
97    process_flag(priority, max),
98    register(?MODULE, self()),
99    case catch start_portprogram() of
100	{ok, Port} ->
101	    Starter ! {ok, self()},
102	    loop(Parent, #state{port=Port, cmd=[], options=[]});
103	no_heart ->
104	    Starter ! {no_heart, self()};
105	error ->
106	    Starter ! {start_error, self()}
107    end.
108
109-spec set_cmd(Cmd) -> 'ok' | {'error', {'bad_cmd', Cmd}} when
110      Cmd :: string().
111
112set_cmd(Cmd) ->
113    ?MODULE ! {self(), set_cmd, Cmd},
114    wait().
115
116-spec get_cmd() -> {ok, Cmd} when
117      Cmd :: string().
118
119get_cmd() ->
120    ?MODULE ! {self(), get_cmd},
121    wait().
122
123-spec clear_cmd() -> ok.
124
125clear_cmd() ->
126    ?MODULE ! {self(), clear_cmd},
127    wait().
128
129-spec set_callback(Module,Function) -> 'ok' | {'error', {'bad_callback', {Module, Function}}} when
130      Module :: atom(),
131      Function :: atom().
132
133set_callback(Module, Function) ->
134    ?MODULE ! {self(), set_callback, {Module,Function}},
135    wait().
136
137-spec get_callback() -> {'ok', {Module, Function}} | 'none' when
138      Module :: atom(),
139      Function :: atom().
140
141get_callback() ->
142    ?MODULE ! {self(), get_callback},
143    wait().
144
145-spec clear_callback() -> ok.
146
147clear_callback() ->
148    ?MODULE ! {self(), clear_callback},
149    wait().
150
151-spec set_options(Options) -> 'ok' | {'error', {'bad_options', Options}} when
152      Options :: [heart_option()].
153
154set_options(Options) ->
155    ?MODULE ! {self(), set_options, Options},
156    wait().
157
158-spec get_options() -> {'ok', Options} | 'none' when
159      Options :: [atom()].
160
161get_options() ->
162    ?MODULE ! {self(), get_options},
163    wait().
164
165%%% Should be used solely by the release handler!!!!!!!
166-spec cycle() -> 'ok' | {'error', term()}.
167
168cycle() ->
169    ?MODULE ! {self(), cycle},
170    wait().
171
172wait() ->
173    receive
174	{?MODULE, Res} ->
175	    Res
176    end.
177
178start_portprogram() ->
179    check_start_heart(),
180    HeartCmd = "heart -pid " ++ os:getpid() ++ " " ++ get_heart_timeouts(),
181    try open_port({spawn, HeartCmd}, [{packet, 2}]) of
182	Port when is_port(Port) ->
183	    case wait_ack(Port) of
184		ok ->
185		    %% register port so the vm can find it if need be
186		    register(?HEART_PORT_NAME, Port),
187		    {ok, Port};
188		{error, Reason} ->
189		    report_problem({{port_problem, Reason},
190				    {heart, start_portprogram, []}}),
191		    error
192	    end
193    catch
194	_:Reason ->
195	    report_problem({{open_port, Reason},
196			    {heart, start_portprogram, []}}),
197	    error
198    end.
199
200get_heart_timeouts() ->
201    case os:getenv("HEART_BEAT_TIMEOUT") of
202	false -> "";
203	H when is_list(H) ->
204	    "-ht " ++ H
205    end.
206
207check_start_heart() ->
208    case init:get_argument(heart) of
209	{ok, [[]]} ->
210	    ok;
211	error ->
212	    throw(no_heart);
213	{ok, [[X|_]|_]} ->
214	    report_problem({{bad_heart_flag, list_to_atom(X)},
215			    {heart, check_start_heart, []}}),
216	    throw(error)
217    end.
218
219wait_ack(Port) ->
220    receive
221	{Port, {data, [?START_ACK]}} ->
222	    ok;
223	{'EXIT', Port, badsig} -> % Since this is not synchronous, skip it!
224	    wait_ack(Port);
225	{'EXIT', Port, Reason} -> % The port really terminated.
226	    {error, Reason}
227    end.
228
229loop(Parent, #state{port=Port}=S) ->
230    _ = send_heart_beat(S),
231    receive
232	{From, set_cmd, NewCmd0} ->
233	    Enc = file:native_name_encoding(),
234	    case catch unicode:characters_to_binary(NewCmd0,Enc,Enc) of
235		NewCmd when is_binary(NewCmd), byte_size(NewCmd) < 2047 ->
236		    _ = send_heart_cmd(Port, NewCmd),
237		    _ = wait_ack(Port),
238		    From ! {?MODULE, ok},
239		    loop(Parent, S#state{cmd=NewCmd});
240		_ ->
241		    From ! {?MODULE, {error, {bad_cmd, NewCmd0}}},
242		    loop(Parent, S)
243	    end;
244	{From, clear_cmd} ->
245	    From ! {?MODULE, ok},
246	    _ = send_heart_cmd(Port, []),
247	    _ = wait_ack(Port),
248	    loop(Parent, S#state{cmd = []});
249	{From, get_cmd} ->
250	    From ! {?MODULE, get_heart_cmd(Port)},
251            loop(Parent, S);
252	{From, set_callback, Callback} ->
253            case Callback of
254                {M,F} when is_atom(M), is_atom(F) ->
255                    From ! {?MODULE, ok},
256                    loop(Parent, S#state{callback=Callback});
257                _ ->
258		    From ! {?MODULE, {error, {bad_callback, Callback}}},
259                    loop(Parent, S)
260            end;
261        {From, get_callback} ->
262            Res = case S#state.callback of
263                      undefined -> none;
264                      Cb -> {ok, Cb}
265                  end,
266            From ! {?MODULE, Res},
267            loop(Parent, S);
268        {From, clear_callback} ->
269            From ! {?MODULE, ok},
270            loop(Parent, S#state{callback=undefined});
271	{From, set_options, Options} ->
272            case validate_options(Options) of
273                Validated when is_list(Validated) ->
274                    From ! {?MODULE, ok},
275                    loop(Parent, S#state{options=Validated});
276                _ ->
277		    From ! {?MODULE, {error, {bad_options, Options}}},
278                    loop(Parent, S)
279            end;
280        {From, get_options} ->
281            Res = case S#state.options of
282                      [] -> none;
283                      Cb -> {ok, Cb}
284                  end,
285            From ! {?MODULE, Res},
286            loop(Parent, S);
287	{From, cycle} ->
288	    %% Calls back to loop
289	    do_cycle_port_program(From, Parent, S);
290	{'EXIT', Parent, shutdown} ->
291	    no_reboot_shutdown(Port);
292	{'EXIT', Parent, Reason} ->
293	    exit(Port, Reason),
294	    exit(Reason);
295	{'EXIT', Port, badsig} ->  % we can ignore badsig-messages!
296	    loop(Parent, S);
297	{'EXIT', Port, _Reason} ->
298	    exit({port_terminated, {?MODULE, loop, [Parent, S]}});
299	_ ->
300	    loop(Parent, S)
301    after
302	?TIMEOUT ->
303	    loop(Parent, S)
304    end.
305
306-spec no_reboot_shutdown(port()) -> no_return().
307
308no_reboot_shutdown(Port) ->
309    _ = send_shutdown(Port),
310    receive
311	{'EXIT', Port, Reason} when Reason =/= badsig ->
312	    exit(normal)
313    end.
314
315validate_options(Opts) -> validate_options(Opts,[]).
316validate_options([],Res) -> Res;
317validate_options([?SCHEDULER_CHECK_OPT=Opt|Opts],Res) -> validate_options(Opts,[Opt|Res]);
318validate_options(_,_) -> error.
319
320do_cycle_port_program(Caller, Parent, #state{port=Port} = S) ->
321    unregister(?HEART_PORT_NAME),
322    case catch start_portprogram() of
323	{ok, NewPort} ->
324	    _ = send_shutdown(Port),
325	    receive
326		{'EXIT', Port, _Reason} ->
327		    _ = send_heart_cmd(NewPort, S#state.cmd),
328		    Caller ! {?MODULE, ok},
329		    loop(Parent, S#state{port=NewPort})
330	    after
331		?CYCLE_TIMEOUT ->
332		    %% Huh! Two heart port programs running...
333		    %% well, the old one has to be sick not to respond
334		    %% so we'll settle for the new one...
335		    _ = send_heart_cmd(NewPort, S#state.cmd),
336		    Caller ! {?MODULE, {error, stop_error}},
337		    loop(Parent, S#state{port=NewPort})
338	    end;
339	no_heart ->
340	    Caller ! {?MODULE, {error, no_heart}},
341	    loop(Parent, S);
342	error ->
343	    Caller ! {?MODULE, {error, start_error}},
344	    loop(Parent, S)
345    end.
346
347
348%% "Beates" the heart once.
349send_heart_beat(#state{port=Port, callback=Cb, options=Opts}) ->
350    ok = check_system(Opts),
351    ok = check_callback(Cb),
352    Port ! {self(), {command, [?HEART_BEAT]}}.
353
354%% Set a new HEART_COMMAND.
355-dialyzer({no_improper_lists, send_heart_cmd/2}).
356send_heart_cmd(Port, []) ->
357    Port ! {self(), {command, [?CLEAR_CMD]}};
358send_heart_cmd(Port, Cmd) ->
359    Port ! {self(), {command, [?SET_CMD|Cmd]}}.
360
361get_heart_cmd(Port) ->
362    Port ! {self(), {command, [?GET_CMD]}},
363    receive
364	{Port, {data, [?HEART_CMD | Cmd]}} ->
365	    {ok, Cmd}
366    end.
367
368check_system([]) -> ok;
369check_system([?SCHEDULER_CHECK_OPT|Opts]) ->
370    ok = erts_internal:system_check(schedulers),
371    check_system(Opts).
372
373%% validate system by performing a check before the heartbeat
374%% return 'ok' if everything is alright.
375%% Terminate if with reason if something is a miss.
376%% It is fine to timeout in the callback, in fact that is the intention
377%% if something goes wront -> no heartbeat.
378
379check_callback(Callback) ->
380    case Callback of
381        undefined -> ok;
382        {M,F} ->
383            erlang:apply(M,F,[])
384    end.
385
386%% Sends shutdown command to the port.
387send_shutdown(Port) -> Port ! {self(), {command, [?SHUT_DOWN]}}.
388
389%% We must report using erlang:display/1 since we don't know whether
390%% there is an error_logger available or not.
391report_problem(Error) ->
392    erlang:display(Error).
393