1%% 2%% %CopyrightBegin% 3%% 4%% Copyright Ericsson AB 2019-2021. All Rights Reserved. 5%% 6%% Licensed under the Apache License, Version 2.0 (the "License"); 7%% you may not use this file except in compliance with the License. 8%% You may obtain a copy of the License at 9%% 10%% http://www.apache.org/licenses/LICENSE-2.0 11%% 12%% Unless required by applicable law or agreed to in writing, software 13%% distributed under the License is distributed on an "AS IS" BASIS, 14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15%% See the License for the specific language governing permissions and 16%% limitations under the License. 17%% 18%% %CopyrightEnd% 19%% 20 21-module(snmp_test_global_sys_monitor). 22 23-export([start/0, stop/0, 24 reset_events/0, 25 events/0, 26 log/1]). 27-export([init/1]). 28 29-include("snmp_test_lib.hrl"). 30 31-define(NAME, ?MODULE). 32-define(TIMEOUT, timer:seconds(6)). 33 34 35%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 36 37start() -> 38 Parent = self(), 39 proc_lib:start(?MODULE, init, [Parent]). 40 41stop() -> 42 cast(stop). 43 44%% This does not reset the global counter but the "collector" 45%% See events for more info. 46reset_events() -> 47 call(reset_events, ?TIMEOUT). 48 49events() -> 50 call(events, ?TIMEOUT). 51 52log(Event) -> 53 cast({node(), Event}). 54 55 56%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 57 58init(Parent) -> 59 process_flag(priority, high), 60 case global:register_name(?NAME, self()) of 61 yes -> 62 info_msg("Starting as ~p (on ~p)", [self(), node()]), 63 proc_lib:init_ack(Parent, {ok, self()}), 64 loop(#{parent => Parent, ev_cnt => 0, evs => []}); 65 no -> 66 warning_msg("Already started", []), 67 proc_lib:init_ack(Parent, {error, already_started}), 68 exit(normal) 69 end. 70 71 72%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 73 74loop(State) -> 75 receive 76 {?MODULE, stop} -> 77 warning_msg("Stopping with ~w events counted", 78 [maps:get(ev_cnt, State)]), 79 exit(normal); 80 81 {?MODULE, Ref, From, reset_events} -> 82 TotEvCnt = maps:get(ev_cnt, State), 83 EvCnt = length(maps:get(evs, State)), 84 info_msg("Reset events when" 85 "~n Total Number of Events: ~p" 86 "~n Current Number of Events: ~p", 87 [TotEvCnt, EvCnt]), 88 From ! {?MODULE, Ref, {ok, {TotEvCnt, EvCnt}}}, 89 loop(State#{evs => []}); 90 91 {?MODULE, Ref, From, events} -> 92 Evs = maps:get(evs, State), 93 From ! {?MODULE, Ref, lists:reverse(Evs)}, 94 loop(State); 95 96 {?MODULE, {Node, Event}} -> 97 State2 = process_event(State, Node, Event), 98 loop(State2); 99 100 {nodedown = Event, Node} -> 101 State2 = process_event(State, Node, Event), 102 loop(State2); 103 104 _ -> 105 loop(State) 106 end. 107 108 109%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 110 111process_event(State, Node, {Pid, TS, Tag, Info}) -> 112 process_system_event(State, Node, Pid, TS, Tag, Info); 113 114process_event(State, Node, {TS, starting}) -> 115 FTS = snmp_misc:format_timestamp(TS), 116 info_msg("System Monitor starting on node ~p at ~s", [Node, FTS]), 117 if 118 (Node =/= node()) -> 119 erlang:monitor_node(Node, true); 120 true -> 121 ok 122 end, 123 State; 124 125process_event(State, Node, {TS, stopping}) -> 126 FTS = ?FTS(TS), 127 info_msg("System Monitor stopping on node ~p at ~s", [Node, FTS]), 128 if 129 (Node =/= node()) -> 130 erlang:monitor_node(Node, false); 131 true -> 132 ok 133 end, 134 State; 135 136process_event(State, Node, {TS, already_started}) -> 137 FTS = snmp_misc:format_timestamp(TS), 138 info_msg("System Monitor already started on node ~p at ~s", [Node, FTS]), 139 State; 140 141process_event(State, Node, nodedown) -> 142 info_msg("Node ~p down", [Node]), 143 State; 144 145process_event(State, Node, Event) -> 146 warning_msg("Received unknown event from node ~p:" 147 "~n ~p", [Node, Event]), 148 State. 149 150 151%% System Monitor events 152%% We only *count* system events 153process_system_event(#{ev_cnt := Cnt, evs := Evs} = State, 154 Node, Pid, TS, long_gc = Ev, Info) -> 155 print_system_event(f("Long GC (~w)", [length(Evs)]), Node, Pid, TS, Info), 156 State#{ev_cnt => Cnt + 1, evs => [{Node, Ev} | Evs]}; 157process_system_event(#{ev_cnt := Cnt, evs := Evs} = State, 158 Node, Pid, TS, long_schedule = Ev, Info) -> 159 print_system_event(f("Long Schedule (~w)", [length(Evs)]), Node, Pid, TS, Info), 160 State#{ev_cnt => Cnt + 1, evs => [{Node, Ev} | Evs]}; 161process_system_event(#{ev_cnt := Cnt, evs := Evs} = State, 162 Node, Pid, TS, large_heap = Ev, Info) -> 163 print_system_event(f("Large Heap (~w)", [length(Evs)]), Node, Pid, TS, Info), 164 State#{ev_cnt => Cnt + 1, evs => [{Node, Ev} | Evs]}; 165process_system_event(#{ev_cnt := Cnt, evs := Evs} = State, 166 Node, Pid, TS, busy_port = Ev, Info) -> 167 print_system_event(f("Busy port (~w)", [length(Evs)]), Node, Pid, TS, Info), 168 State#{ev_cnt => Cnt + 1, evs => [{Node, Ev} | Evs]}; 169process_system_event(#{ev_cnt := Cnt, evs := Evs} = State, 170 Node, Pid, TS, busy_dist_port = Ev, Info) -> 171 print_system_event(f("Busy dist port (~w)", [length(Evs)]), 172 Node, Pid, TS, Info), 173 State#{ev_cnt => Cnt + 1, evs => [{Node, Ev} | Evs]}; 174 175%% And everything else 176process_system_event(State, Node, Pid, TS, Tag, Info) -> 177 Pre = f("Unknown Event '~p'", [Tag]), 178 print_system_event(Pre, Node, Pid, TS, Info), 179 State. 180 181 182print_system_event(Pre, Node, Pid, TS, Info) -> 183 FTS = snmp_misc:format_timestamp(TS), 184 warning_msg("~s from ~p (~p) at ~s:" 185 "~n ~p", [Pre, Node, Pid, FTS, Info]). 186 187f(F, A) -> 188 lists:flatten(io_lib:format(F, A)). 189 190 191%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 192 193cast(Msg) -> 194 try global:send(?NAME, {?MODULE, Msg}) of 195 Pid when is_pid(Pid) -> 196 ok 197 catch 198 C:E:_ -> 199 {error, {catched, C, E}} 200 end. 201 202%% call(Req) -> 203%% call(Req, infinity). 204 205%% call(Req, Timeout) -> 206%% Ref = make_ref(), 207%% try global:send(?NAME, {?MODULE, Ref, self(), Req}) of 208%% Pid when is_pid(Pid) -> 209%% receive 210%% {?MODULE, Ref, Rep} -> 211%% Rep 212%% after Timeout -> 213%% {error, timeout} 214%% end 215%% catch 216%% C:E:_ -> 217%% {error, {catched, C, E}} 218%% end. 219 220call(Req, Timeout) when (Timeout =:= infinity) -> 221 call(Req, Timeout, Timeout); 222call(Req, Timeout) when is_integer(Timeout) andalso (Timeout > 2000) -> 223 call(Req, Timeout, Timeout - 1000); 224call(Req, Timeout) when is_integer(Timeout) andalso (Timeout > 1000) -> 225 call(Req, Timeout, Timeout - 500); 226call(Req, Timeout) when is_integer(Timeout) -> 227 call(Req, Timeout, Timeout div 2). 228 229%% This peace of wierdness is because on some machines this call has 230%% hung (in a call during end_per_testcase, which had a 1 min timeout, 231%% or if that was the total time for the test case). 232%% But because it hung there, we don't really know what where it git stuck. 233%% So, by making the call in a tmp process, that we supervise, we can 234%% keep control. Also, we change the default timeout from infinity to an 235%% actual time (16 seconds). 236call(Req, Timeout1, Timeout2) -> 237 F = fun() -> 238 Ref = make_ref(), 239 try global:send(?NAME, {?MODULE, Ref, self(), Req}) of 240 NamePid when is_pid(NamePid) -> 241 receive 242 {?MODULE, Ref, Rep} -> 243 Rep 244 after Timeout2 -> 245 {error, timeout} 246 end 247 catch 248 C:E:_ -> 249 {error, {catched, C, E}} 250 end 251 end, 252 {Pid, Mon} = spawn_monitor(F), 253 receive 254 {'DOWN', Mon, process, Pid, Result} -> 255 Result 256 after Timeout1 -> 257 PInfo = process_info(Pid), 258 exit(Pid, kill), 259 {error, {timeout, PInfo}} 260 end. 261 262 263 264%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 265 266info_msg(F, A) -> 267 error_logger:info_msg(format_msg(F, A), []). 268 269warning_msg(F, A) -> 270 error_logger:warning_msg(format_msg(F, A), []). 271 272 273format_msg(F, A) -> 274 f("~n" ++ 275 "****** SNMP TEST GLOBAL SYSTEM MONITOR ******~n~n" ++ 276 F ++ 277 "~n~n", 278 A). 279 280