1%% Copyright (c) 2011 Basho Technologies, Inc.  All Rights Reserved.
2%% Copyright (c) 2018-2021 VMware, Inc. or its affiliates.  All rights reserved.
3%%
4%% This file is provided to you under the Apache License,
5%% Version 2.0 (the "License"); you may not use this file
6%% except in compliance with the License.  You may obtain
7%% a copy of the License at
8%%
9%%   https://www.apache.org/licenses/LICENSE-2.0
10%%
11%% Unless required by applicable law or agreed to in writing,
12%% software distributed under the License is distributed on an
13%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14%% KIND, either express or implied.  See the License for the
15%% specific language governing permissions and limitations
16%% under the License.
17
18%% @doc A custom event handler to the `sysmon_handler' application's
19%% `system_monitor' event manager.
20%%
21%% This module attempts to discover more information about a process
22%% that generates a system_monitor event.
23
24-module(rabbit_sysmon_handler).
25
26-behaviour(gen_event).
27
28%% API
29-export([add_handler/0]).
30
31%% gen_event callbacks
32-export([init/1, handle_event/2, handle_call/2,
33         handle_info/2, terminate/2, code_change/3]).
34
35-record(state, {timer_ref :: reference() | undefined}).
36
37-define(INACTIVITY_TIMEOUT, 5000).
38
39%%%===================================================================
40%%% gen_event callbacks
41%%%===================================================================
42
43add_handler() ->
44    %% Vulnerable to race conditions (installing handler multiple
45    %% times), but risk is zero in the common OTP app startup case.
46    case lists:member(?MODULE, gen_event:which_handlers(sysmon_handler)) of
47        true ->
48            ok;
49        false ->
50            sysmon_handler_filter:add_custom_handler(?MODULE, [])
51    end.
52
53%%%===================================================================
54%%% gen_event callbacks
55%%%===================================================================
56
57%%--------------------------------------------------------------------
58%% @private
59%% @doc
60%% Whenever a new event handler is added to an event manager,
61%% this function is called to initialize the event handler.
62%%
63%% @spec init(Args) -> {ok, State}
64%% @end
65%%--------------------------------------------------------------------
66init([]) ->
67    {ok, #state{}, hibernate}.
68
69%%--------------------------------------------------------------------
70%% @private
71%% @doc
72%% Whenever an event manager receives an event sent using
73%% gen_event:notify/2 or gen_event:sync_notify/2, this function is
74%% called for each installed event handler to handle the event.
75%%
76%% @spec handle_event(Event, State) ->
77%%                          {ok, State} |
78%%                          {swap_handler, Args1, State1, Mod2, Args2} |
79%%                          remove_handler
80%% @end
81%%--------------------------------------------------------------------
82handle_event({monitor, Pid, Type, _Info},
83             State=#state{timer_ref=TimerRef}) when Pid == self() ->
84    %% Reset the inactivity timeout
85    NewTimerRef = reset_timer(TimerRef),
86    maybe_collect_garbage(Type),
87    {ok, State#state{timer_ref=NewTimerRef}};
88handle_event({monitor, PidOrPort, Type, Info}, State=#state{timer_ref=TimerRef}) ->
89    %% Reset the inactivity timeout
90    NewTimerRef = reset_timer(TimerRef),
91    {Fmt, Args} = format_pretty_proc_or_port_info(PidOrPort),
92    rabbit_log:warning("~p ~w ~w " ++ Fmt ++ " ~w", [?MODULE, Type, PidOrPort] ++ Args ++ [Info]),
93    {ok, State#state{timer_ref=NewTimerRef}};
94handle_event({suppressed, Type, Info}, State=#state{timer_ref=TimerRef}) ->
95    %% Reset the inactivity timeout
96    NewTimerRef = reset_timer(TimerRef),
97    rabbit_log:debug("~p encountered a suppressed event of type ~w: ~w", [?MODULE, Type, Info]),
98    {ok, State#state{timer_ref=NewTimerRef}};
99handle_event(Event, State=#state{timer_ref=TimerRef}) ->
100    NewTimerRef = reset_timer(TimerRef),
101    rabbit_log:warning("~p unhandled event: ~p", [?MODULE, Event]),
102    {ok, State#state{timer_ref=NewTimerRef}}.
103
104%%--------------------------------------------------------------------
105%% @private
106%% @doc
107%% Whenever an event manager receives a request sent using
108%% gen_event:call/3,4, this function is called for the specified
109%% event handler to handle the request.
110%%
111%% @spec handle_call(Request, State) ->
112%%                   {ok, Reply, State} |
113%%                   {swap_handler, Reply, Args1, State1, Mod2, Args2} |
114%%                   {remove_handler, Reply}
115%% @end
116%%--------------------------------------------------------------------
117handle_call(_Call, State) ->
118    Reply = not_supported,
119    {ok, Reply, State}.
120
121%%--------------------------------------------------------------------
122%% @private
123%% @doc
124%% This function is called for each installed event handler when
125%% an event manager receives any other message than an event or a
126%% synchronous request (or a system message).
127%%
128%% @spec handle_info(Info, State) ->
129%%                         {ok, State} |
130%%                         {swap_handler, Args1, State1, Mod2, Args2} |
131%%                         remove_handler
132%% @end
133%%--------------------------------------------------------------------
134handle_info(inactivity_timeout, State) ->
135    %% No events have arrived for the timeout period
136    %% so hibernate to free up resources.
137    {ok, State, hibernate};
138handle_info(Info, State) ->
139    rabbit_log:info("handle_info got ~p", [Info]),
140    {ok, State}.
141
142%%--------------------------------------------------------------------
143%% @private
144%% @doc
145%% Whenever an event handler is deleted from an event manager, this
146%% function is called. It should be the opposite of Module:init/1 and
147%% do any necessary cleaning up.
148%%
149%% @spec terminate(Reason, State) -> void()
150%% @end
151%%--------------------------------------------------------------------
152terminate(_Reason, _State) ->
153    ok.
154
155%%--------------------------------------------------------------------
156%% @private
157%% @doc
158%% Convert process state when code is changed
159%%
160%% @spec code_change(OldVsn, State, Extra) -> {ok, NewState}
161%% @end
162%%--------------------------------------------------------------------
163code_change(_OldVsn, State, _Extra) ->
164    {ok, State}.
165
166%%%===================================================================
167%%% Internal functions
168%%%===================================================================
169
170format_pretty_proc_or_port_info(PidOrPort) ->
171    try
172        case get_pretty_proc_or_port_info(PidOrPort) of
173            undefined ->
174                {"", []};
175            Res ->
176                Res
177        end
178    catch C:E:S ->
179        {"Pid ~w, ~W ~W at ~w\n",
180            [PidOrPort, C, 20, E, 20, S]}
181    end.
182
183get_pretty_proc_or_port_info(Pid) when is_pid(Pid) ->
184    Infos = [registered_name, initial_call, current_function, message_queue_len],
185    case process_info(Pid, Infos) of
186        undefined ->
187            undefined;
188        [] ->
189            undefined;
190        [{registered_name, RN0}, ICT1, {_, CF}, {_, MQL}] ->
191            ICT = case proc_lib:translate_initial_call(Pid) of
192                     {proc_lib, init_p, 5} ->   % not by proc_lib, see docs
193                         ICT1;
194                     ICT2 ->
195                         {initial_call, ICT2}
196                 end,
197            RNL = if RN0 == [] -> [];
198                     true      -> [{name, RN0}]
199                  end,
200            {"~w", [RNL ++ [ICT, CF, {message_queue_len, MQL}]]}
201    end;
202get_pretty_proc_or_port_info(Port) when is_port(Port) ->
203    PortInfo = erlang:port_info(Port),
204    {value, {name, Name}, PortInfo2} = lists:keytake(name, 1, PortInfo),
205    QueueSize = [erlang:port_info(Port, queue_size)],
206    Connected = case proplists:get_value(connected, PortInfo2) of
207                    undefined ->
208                        [];
209                    ConnectedPid ->
210                        case proc_lib:translate_initial_call(ConnectedPid) of
211                            {proc_lib, init_p, 5} ->   % not by proc_lib, see docs
212                                [];
213                            ICT ->
214                                [{initial_call, ICT}]
215                        end
216                end,
217    {"name ~s ~w", [Name, lists:append([PortInfo2, QueueSize, Connected])]}.
218
219
220%% @doc If the message type is due to a large heap warning
221%% and the source is ourself, go ahead and collect garbage
222%% to avoid the death spiral.
223-spec maybe_collect_garbage(atom()) -> ok.
224maybe_collect_garbage(large_heap) ->
225    erlang:garbage_collect(),
226    ok;
227maybe_collect_garbage(_) ->
228    ok.
229
230-spec reset_timer(undefined | reference()) -> reference().
231reset_timer(undefined) ->
232    erlang:send_after(?INACTIVITY_TIMEOUT, self(), inactivity_timeout);
233reset_timer(TimerRef) ->
234    _ = erlang:cancel_timer(TimerRef),
235    reset_timer(undefined).
236