1%% 2%% %CopyrightBegin% 3%% 4%% Copyright Ericsson AB 2021-2021. All Rights Reserved. 5%% 6%% Licensed under the Apache License, Version 2.0 (the "License"); 7%% you may not use this file except in compliance with the License. 8%% You may obtain a copy of the License at 9%% 10%% http://www.apache.org/licenses/LICENSE-2.0 11%% 12%% Unless required by applicable law or agreed to in writing, software 13%% distributed under the License is distributed on an "AS IS" BASIS, 14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15%% See the License for the specific language governing permissions and 16%% limitations under the License. 17%% 18%% %CopyrightEnd% 19%% 20 21-module(kernel_test_global_sys_monitor). 22 23-export([start/0, stop/0, 24 reset_events/0, 25 events/0, events/1, 26 log/1]). 27-export([timestamp/0, format_timestamp/1]). 28-export([init/1]). 29 30-include("kernel_test_lib.hrl"). 31 32-define(NAME, ?MODULE). 33-define(TIMEOUT, timer:seconds(6)). 34 35 36%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 37 38start() -> 39 Parent = self(), 40 proc_lib:start(?MODULE, init, [Parent]). 41 42stop() -> 43 cast(stop). 44 45%% This does not reset the global counter but the "collector" 46%% See events for more info. 47reset_events() -> 48 call(reset_events, ?TIMEOUT). 49 50events() -> 51 events(infinity). 52 53events(Timeout) when (Timeout =:= infinity) orelse 54 (is_integer(Timeout) andalso (Timeout > 0)) -> 55 call(events, Timeout). 56 57log(Event) -> 58 cast({node(), Event}). 59 60 61%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 62 63timestamp() -> 64 erlang:timestamp(). 65 66format_timestamp({_N1, _N2, N3} = TS) -> 67 {Date, Time} = calendar:now_to_local_time(TS), 68 {YYYY,MM,DD} = Date, 69 {Hour,Min,Sec} = Time, 70 FormatDate = 71 io_lib:format("~.4w-~.2.0w-~.2.0w ~.2.0w:~.2.0w:~.2.0w.~.3.0w", 72 [YYYY, MM, DD, Hour, Min, Sec, N3 div 1000]), 73 lists:flatten(FormatDate). 74 75 76%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 77 78init(Parent) -> 79 process_flag(priority, high), 80 case global:register_name(?NAME, self()) of 81 yes -> 82 info_msg("Starting as ~p (on ~p)", [self(), node()]), 83 proc_lib:init_ack(Parent, {ok, self()}), 84 loop(#{parent => Parent, ev_cnt => 0, evs => []}); 85 no -> 86 warning_msg("Already started", []), 87 proc_lib:init_ack(Parent, {error, already_started}), 88 exit(normal) 89 end. 90 91 92%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 93 94loop(State) -> 95 receive 96 {?MODULE, stop} -> 97 warning_msg("Stopping with ~w events counted", 98 [maps:get(ev_cnt, State)]), 99 exit(normal); 100 101 {?MODULE, Ref, From, reset_events} -> 102 TotEvCnt = maps:get(ev_cnt, State), 103 EvCnt = length(maps:get(evs, State)), 104 info_msg("Reset events when" 105 "~n Total Number of Events: ~p" 106 "~n Current Number of Events: ~p", 107 [TotEvCnt, EvCnt]), 108 From ! {?MODULE, Ref, {ok, {TotEvCnt, EvCnt}}}, 109 loop(State#{evs => []}); 110 111 {?MODULE, Ref, From, events} -> 112 Evs = maps:get(evs, State), 113 From ! {?MODULE, Ref, lists:reverse(Evs)}, 114 loop(State); 115 116 {?MODULE, {Node, Event}} -> 117 State2 = process_event(State, Node, Event), 118 loop(State2); 119 120 {nodedown = Event, Node} -> 121 State2 = process_event(State, Node, Event), 122 loop(State2); 123 124 _ -> 125 loop(State) 126 end. 127 128 129%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 130 131process_event(State, Node, {Pid, TS, Tag, Info}) -> 132 process_system_event(State, Node, Pid, TS, Tag, Info); 133 134process_event(State, Node, {TS, starting}) -> 135 FTS = format_timestamp(TS), 136 info_msg("System Monitor starting on node ~p at ~s", [Node, FTS]), 137 if 138 (Node =/= node()) -> 139 erlang:monitor_node(Node, true); 140 true -> 141 ok 142 end, 143 State; 144 145process_event(State, Node, {TS, stopping}) -> 146 FTS = format_timestamp(TS), 147 info_msg("System Monitor stopping on node ~p at ~s", [Node, FTS]), 148 if 149 (Node =/= node()) -> 150 erlang:monitor_node(Node, false); 151 true -> 152 ok 153 end, 154 State; 155 156process_event(State, Node, {TS, already_started}) -> 157 FTS = format_timestamp(TS), 158 info_msg("System Monitor already started on node ~p at ~s", [Node, FTS]), 159 State; 160 161process_event(State, Node, nodedown) -> 162 info_msg("Node ~p down", [Node]), 163 State; 164 165process_event(State, Node, Event) -> 166 warning_msg("Received unknown event from node ~p:" 167 "~n ~p", [Node, Event]), 168 State. 169 170 171%% System Monitor events 172%% We only *count* system events 173process_system_event(#{ev_cnt := Cnt, evs := Evs} = State, 174 Node, Pid, TS, long_gc = Ev, Info) -> 175 print_system_event(f("Long GC (~w)", [length(Evs)]), Node, Pid, TS, Info), 176 State#{ev_cnt => Cnt + 1, evs => [{Node, Ev} | Evs]}; 177process_system_event(#{ev_cnt := Cnt, evs := Evs} = State, 178 Node, Pid, TS, long_schedule = Ev, Info) -> 179 print_system_event(f("Long Schedule (~w)", [length(Evs)]), Node, Pid, TS, Info), 180 State#{ev_cnt => Cnt + 1, evs => [{Node, Ev} | Evs]}; 181process_system_event(#{ev_cnt := Cnt, evs := Evs} = State, 182 Node, Pid, TS, large_heap = Ev, Info) -> 183 print_system_event(f("Large Heap (~w)", [length(Evs)]), Node, Pid, TS, Info), 184 State#{ev_cnt => Cnt + 1, evs => [{Node, Ev} | Evs]}; 185process_system_event(#{ev_cnt := Cnt, evs := Evs} = State, 186 Node, Pid, TS, busy_port = Ev, Info) -> 187 print_system_event(f("Busy port (~w)", [length(Evs)]), Node, Pid, TS, Info), 188 State#{ev_cnt => Cnt + 1, evs => [{Node, Ev} | Evs]}; 189process_system_event(#{ev_cnt := Cnt, evs := Evs} = State, 190 Node, Pid, TS, busy_dist_port = Ev, Info) -> 191 print_system_event(f("Busy dist port (~w)", [length(Evs)]), 192 Node, Pid, TS, Info), 193 State#{ev_cnt => Cnt + 1, evs => [{Node, Ev} | Evs]}; 194 195%% And everything else 196process_system_event(State, Node, Pid, TS, Tag, Info) -> 197 Pre = f("Unknown Event '~p'", [Tag]), 198 print_system_event(Pre, Node, Pid, TS, Info), 199 State. 200 201 202print_system_event(Pre, Node, Pid, TS, Info) -> 203 FTS = format_timestamp(TS), 204 warning_msg("~s from ~p (~p) at ~s:" 205 "~n ~p", [Pre, Node, Pid, FTS, Info]). 206 207f(F, A) -> 208 lists:flatten(io_lib:format(F, A)). 209 210 211%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 212 213cast(Msg) -> 214 try global:send(?NAME, {?MODULE, Msg}) of 215 Pid when is_pid(Pid) -> 216 ok 217 catch 218 C:E:_ -> 219 {error, {catched, C, E}} 220 end. 221 222 223call(Req, Timeout) when (Timeout =:= infinity) -> 224 call(Req, Timeout, Timeout); 225call(Req, Timeout) when is_integer(Timeout) andalso (Timeout > 2000) -> 226 call(Req, Timeout, Timeout - 1000); 227call(Req, Timeout) when is_integer(Timeout) andalso (Timeout > 1000) -> 228 call(Req, Timeout, Timeout - 500); 229call(Req, Timeout) when is_integer(Timeout) -> 230 call(Req, Timeout, Timeout div 2). 231 232%% This peace of wierdness is because on some machines this call has 233%% hung (in a call during end_per_testcase, which had a 1 min timeout, 234%% or if that was the total time for the test case). 235%% But because it hung there, we don't really know what where it git stuck. 236%% So, by making the call in a tmp process, that we supervise, we can 237%% keep control. Also, we change the default timeout from infinity to an 238%% actual time (16 seconds). 239call(Req, Timeout1, Timeout2) -> 240 F = fun() -> 241 Ref = make_ref(), 242 try global:send(?NAME, {?MODULE, Ref, self(), Req}) of 243 NamePid when is_pid(NamePid) -> 244 receive 245 {?MODULE, Ref, Rep} -> 246 exit(Rep) 247 after Timeout2 -> 248 {error, timeout} 249 end 250 catch 251 C:E:_ -> 252 {error, {catched, C, E}} 253 end 254 end, 255 {Pid, Mon} = spawn_monitor(F), 256 receive 257 {'DOWN', Mon, process, Pid, Result} -> 258 Result 259 after Timeout1 -> 260 PInfo = process_info(Pid), 261 exit(Pid, kill), 262 {error, {timeout, PInfo}} 263 end. 264 265 266 267%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 268 269info_msg(F, A) -> 270 error_logger:info_msg(format_msg(F, A), []). 271 272warning_msg(F, A) -> 273 error_logger:warning_msg(format_msg(F, A), []). 274 275 276format_msg(F, A) -> 277 f("~n" ++ 278 "****** KERNEL TEST GLOBAL SYSTEM MONITOR ******~n~n" ++ 279 F ++ 280 "~n~n", 281 A). 282 283