1%% Copyright (c) 2011 Basho Technologies, Inc. All Rights Reserved. 2%% Copyright (c) 2018-2021 VMware, Inc. or its affiliates. All rights reserved. 3%% 4%% This file is provided to you under the Apache License, 5%% Version 2.0 (the "License"); you may not use this file 6%% except in compliance with the License. You may obtain 7%% a copy of the License at 8%% 9%% https://www.apache.org/licenses/LICENSE-2.0 10%% 11%% Unless required by applicable law or agreed to in writing, 12%% software distributed under the License is distributed on an 13%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14%% KIND, either express or implied. See the License for the 15%% specific language governing permissions and limitations 16%% under the License. 17 18%% @doc A custom event handler to the `sysmon_handler' application's 19%% `system_monitor' event manager. 20%% 21%% This module attempts to discover more information about a process 22%% that generates a system_monitor event. 23 24-module(rabbit_sysmon_handler). 25 26-behaviour(gen_event). 27 28%% API 29-export([add_handler/0]). 30 31%% gen_event callbacks 32-export([init/1, handle_event/2, handle_call/2, 33 handle_info/2, terminate/2, code_change/3]). 34 35-record(state, {timer_ref :: reference() | undefined}). 36 37-define(INACTIVITY_TIMEOUT, 5000). 38 39%%%=================================================================== 40%%% gen_event callbacks 41%%%=================================================================== 42 43add_handler() -> 44 %% Vulnerable to race conditions (installing handler multiple 45 %% times), but risk is zero in the common OTP app startup case. 46 case lists:member(?MODULE, gen_event:which_handlers(sysmon_handler)) of 47 true -> 48 ok; 49 false -> 50 sysmon_handler_filter:add_custom_handler(?MODULE, []) 51 end. 52 53%%%=================================================================== 54%%% gen_event callbacks 55%%%=================================================================== 56 57%%-------------------------------------------------------------------- 58%% @private 59%% @doc 60%% Whenever a new event handler is added to an event manager, 61%% this function is called to initialize the event handler. 62%% 63%% @spec init(Args) -> {ok, State} 64%% @end 65%%-------------------------------------------------------------------- 66init([]) -> 67 {ok, #state{}, hibernate}. 68 69%%-------------------------------------------------------------------- 70%% @private 71%% @doc 72%% Whenever an event manager receives an event sent using 73%% gen_event:notify/2 or gen_event:sync_notify/2, this function is 74%% called for each installed event handler to handle the event. 75%% 76%% @spec handle_event(Event, State) -> 77%% {ok, State} | 78%% {swap_handler, Args1, State1, Mod2, Args2} | 79%% remove_handler 80%% @end 81%%-------------------------------------------------------------------- 82handle_event({monitor, Pid, Type, _Info}, 83 State=#state{timer_ref=TimerRef}) when Pid == self() -> 84 %% Reset the inactivity timeout 85 NewTimerRef = reset_timer(TimerRef), 86 maybe_collect_garbage(Type), 87 {ok, State#state{timer_ref=NewTimerRef}}; 88handle_event({monitor, PidOrPort, Type, Info}, State=#state{timer_ref=TimerRef}) -> 89 %% Reset the inactivity timeout 90 NewTimerRef = reset_timer(TimerRef), 91 {Fmt, Args} = format_pretty_proc_or_port_info(PidOrPort), 92 rabbit_log:warning("~p ~w ~w " ++ Fmt ++ " ~w", [?MODULE, Type, PidOrPort] ++ Args ++ [Info]), 93 {ok, State#state{timer_ref=NewTimerRef}}; 94handle_event({suppressed, Type, Info}, State=#state{timer_ref=TimerRef}) -> 95 %% Reset the inactivity timeout 96 NewTimerRef = reset_timer(TimerRef), 97 rabbit_log:debug("~p encountered a suppressed event of type ~w: ~w", [?MODULE, Type, Info]), 98 {ok, State#state{timer_ref=NewTimerRef}}; 99handle_event(Event, State=#state{timer_ref=TimerRef}) -> 100 NewTimerRef = reset_timer(TimerRef), 101 rabbit_log:warning("~p unhandled event: ~p", [?MODULE, Event]), 102 {ok, State#state{timer_ref=NewTimerRef}}. 103 104%%-------------------------------------------------------------------- 105%% @private 106%% @doc 107%% Whenever an event manager receives a request sent using 108%% gen_event:call/3,4, this function is called for the specified 109%% event handler to handle the request. 110%% 111%% @spec handle_call(Request, State) -> 112%% {ok, Reply, State} | 113%% {swap_handler, Reply, Args1, State1, Mod2, Args2} | 114%% {remove_handler, Reply} 115%% @end 116%%-------------------------------------------------------------------- 117handle_call(_Call, State) -> 118 Reply = not_supported, 119 {ok, Reply, State}. 120 121%%-------------------------------------------------------------------- 122%% @private 123%% @doc 124%% This function is called for each installed event handler when 125%% an event manager receives any other message than an event or a 126%% synchronous request (or a system message). 127%% 128%% @spec handle_info(Info, State) -> 129%% {ok, State} | 130%% {swap_handler, Args1, State1, Mod2, Args2} | 131%% remove_handler 132%% @end 133%%-------------------------------------------------------------------- 134handle_info(inactivity_timeout, State) -> 135 %% No events have arrived for the timeout period 136 %% so hibernate to free up resources. 137 {ok, State, hibernate}; 138handle_info(Info, State) -> 139 rabbit_log:info("handle_info got ~p", [Info]), 140 {ok, State}. 141 142%%-------------------------------------------------------------------- 143%% @private 144%% @doc 145%% Whenever an event handler is deleted from an event manager, this 146%% function is called. It should be the opposite of Module:init/1 and 147%% do any necessary cleaning up. 148%% 149%% @spec terminate(Reason, State) -> void() 150%% @end 151%%-------------------------------------------------------------------- 152terminate(_Reason, _State) -> 153 ok. 154 155%%-------------------------------------------------------------------- 156%% @private 157%% @doc 158%% Convert process state when code is changed 159%% 160%% @spec code_change(OldVsn, State, Extra) -> {ok, NewState} 161%% @end 162%%-------------------------------------------------------------------- 163code_change(_OldVsn, State, _Extra) -> 164 {ok, State}. 165 166%%%=================================================================== 167%%% Internal functions 168%%%=================================================================== 169 170format_pretty_proc_or_port_info(PidOrPort) -> 171 try 172 case get_pretty_proc_or_port_info(PidOrPort) of 173 undefined -> 174 {"", []}; 175 Res -> 176 Res 177 end 178 catch C:E:S -> 179 {"Pid ~w, ~W ~W at ~w\n", 180 [PidOrPort, C, 20, E, 20, S]} 181 end. 182 183get_pretty_proc_or_port_info(Pid) when is_pid(Pid) -> 184 Infos = [registered_name, initial_call, current_function, message_queue_len], 185 case process_info(Pid, Infos) of 186 undefined -> 187 undefined; 188 [] -> 189 undefined; 190 [{registered_name, RN0}, ICT1, {_, CF}, {_, MQL}] -> 191 ICT = case proc_lib:translate_initial_call(Pid) of 192 {proc_lib, init_p, 5} -> % not by proc_lib, see docs 193 ICT1; 194 ICT2 -> 195 {initial_call, ICT2} 196 end, 197 RNL = if RN0 == [] -> []; 198 true -> [{name, RN0}] 199 end, 200 {"~w", [RNL ++ [ICT, CF, {message_queue_len, MQL}]]} 201 end; 202get_pretty_proc_or_port_info(Port) when is_port(Port) -> 203 PortInfo = erlang:port_info(Port), 204 {value, {name, Name}, PortInfo2} = lists:keytake(name, 1, PortInfo), 205 QueueSize = [erlang:port_info(Port, queue_size)], 206 Connected = case proplists:get_value(connected, PortInfo2) of 207 undefined -> 208 []; 209 ConnectedPid -> 210 case proc_lib:translate_initial_call(ConnectedPid) of 211 {proc_lib, init_p, 5} -> % not by proc_lib, see docs 212 []; 213 ICT -> 214 [{initial_call, ICT}] 215 end 216 end, 217 {"name ~s ~w", [Name, lists:append([PortInfo2, QueueSize, Connected])]}. 218 219 220%% @doc If the message type is due to a large heap warning 221%% and the source is ourself, go ahead and collect garbage 222%% to avoid the death spiral. 223-spec maybe_collect_garbage(atom()) -> ok. 224maybe_collect_garbage(large_heap) -> 225 erlang:garbage_collect(), 226 ok; 227maybe_collect_garbage(_) -> 228 ok. 229 230-spec reset_timer(undefined | reference()) -> reference(). 231reset_timer(undefined) -> 232 erlang:send_after(?INACTIVITY_TIMEOUT, self(), inactivity_timeout); 233reset_timer(TimerRef) -> 234 _ = erlang:cancel_timer(TimerRef), 235 reset_timer(undefined). 236