1%% This Source Code Form is subject to the terms of the Mozilla Public 2%% License, v. 2.0. If a copy of the MPL was not distributed with this 3%% file, You can obtain one at https://mozilla.org/MPL/2.0/. 4%% 5%% Copyright (c) 2007-2021 VMware, Inc. or its affiliates. All rights reserved. 6%% 7 8-module(rabbit_diagnostics). 9 10-define(PROCESS_INFO, 11 [registered_name, current_stacktrace, initial_call, message_queue_len, 12 links, monitors, monitored_by, heap_size]). 13 14-export([maybe_stuck/0, maybe_stuck/1, top_memory_use/0, top_memory_use/1, 15 top_binary_refs/0, top_binary_refs/1]). 16 17maybe_stuck() -> maybe_stuck(5000). 18 19maybe_stuck(Timeout) -> 20 Pids = processes(), 21 io:format("~s There are ~p processes.~n", [get_time(), length(Pids)]), 22 maybe_stuck(Pids, Timeout). 23 24maybe_stuck(Pids, Timeout) when Timeout =< 0 -> 25 io:format("~s Found ~p suspicious processes.~n", [get_time(), length(Pids)]), 26 [io:format("~s ~p~n", [get_time(), info(Pid)]) || Pid <- Pids], 27 ok; 28maybe_stuck(Pids, Timeout) -> 29 Pids2 = [P || P <- Pids, looks_stuck(P)], 30 io:format("~s Investigated ~p processes this round, ~pms to go.~n", 31 [get_time(), length(Pids2), Timeout]), 32 timer:sleep(500), 33 maybe_stuck(Pids2, Timeout - 500). 34 35looks_stuck(Pid) -> 36 case info(Pid, status, gone) of 37 {status, waiting} -> 38 %% It's tempting to just check for message_queue_len > 0 39 %% here rather than mess around with stack traces and 40 %% heuristics. But really, sometimes freshly stuck 41 %% processes can have 0 messages... 42 case info(Pid, current_stacktrace, gone) of 43 {current_stacktrace, [H|_]} -> 44 maybe_stuck_stacktrace(H); 45 _ -> 46 false 47 end; 48 _ -> 49 false 50 end. 51 52maybe_stuck_stacktrace({gen_server2, process_next_msg, _}) -> false; 53maybe_stuck_stacktrace({gen_event, fetch_msg, _}) -> false; 54maybe_stuck_stacktrace({prim_inet, accept0, _}) -> false; 55maybe_stuck_stacktrace({prim_inet, recv0, _}) -> false; 56maybe_stuck_stacktrace({rabbit_heartbeat, heartbeater, _}) -> false; 57maybe_stuck_stacktrace({rabbit_net, recv, _}) -> false; 58maybe_stuck_stacktrace({group, _, _}) -> false; 59maybe_stuck_stacktrace({shell, _, _}) -> false; 60maybe_stuck_stacktrace({io, _, _}) -> false; 61maybe_stuck_stacktrace({M, F, A, _}) -> 62 maybe_stuck_stacktrace({M, F, A}); 63maybe_stuck_stacktrace({_M, F, _A}) -> 64 case string:str(atom_to_list(F), "loop") of 65 0 -> true; 66 _ -> false 67 end. 68 69top_memory_use() -> top_memory_use(30). 70 71top_memory_use(Count) -> 72 Pids = processes(), 73 io:format("~s Memory use: top ~p of ~p processes.~n", [get_time(), Count, length(Pids)]), 74 Procs = [{info(Pid, memory, 0), info(Pid)} || Pid <- Pids], 75 Sorted = lists:sublist(lists:reverse(lists:sort(Procs)), Count), 76 io:format("~s ~p~n", [get_time(), Sorted]). 77 78top_binary_refs() -> top_binary_refs(30). 79 80top_binary_refs(Count) -> 81 Pids = processes(), 82 io:format("~s Binary refs: top ~p of ~p processes.~n", [get_time(), Count, length(Pids)]), 83 Procs = [{{binary_refs, binary_refs(Pid)}, info(Pid)} || Pid <- Pids], 84 Sorted = lists:sublist(lists:reverse(lists:sort(Procs)), Count), 85 io:format("~s ~p~n", [get_time(), Sorted]). 86 87binary_refs(Pid) -> 88 case info(Pid, binary, []) of 89 {binary, Refs} -> 90 lists:sum([Sz || {_Ptr, Sz} <- lists:usort([{Ptr, Sz} || 91 {Ptr, Sz, _Cnt} <- Refs])]); 92 _ -> 0 93 end. 94 95info(Pid) -> 96 [{pid, Pid} | info(Pid, ?PROCESS_INFO, [])]. 97 98info(Pid, Infos, Default) -> 99 try 100 process_info(Pid, Infos) 101 catch 102 _:_ -> case is_atom(Infos) of 103 true -> {Infos, Default}; 104 false -> Default 105 end 106 end. 107 108get_time() -> 109 {{Y,M,D}, {H,Min,Sec}} = calendar:local_time(), 110 [ integer_to_list(Y), "-", 111 prefix_zero(integer_to_list(M)), "-", 112 prefix_zero(integer_to_list(D)), " ", 113 prefix_zero(integer_to_list(H)), ":", 114 prefix_zero(integer_to_list(Min)), ":", 115 prefix_zero(integer_to_list(Sec)) 116 ]. 117 118prefix_zero([C]) -> [$0, C]; 119prefix_zero([_,_] = Full) -> Full. 120