1## This Source Code Form is subject to the terms of the Mozilla Public 2## License, v. 2.0. If a copy of the MPL was not distributed with this 3## file, You can obtain one at https://mozilla.org/MPL/2.0/. 4## 5## Copyright (c) 2007-2021 VMware, Inc. or its affiliates. All rights reserved. 6 7defmodule RabbitMQ.CLI.Diagnostics.Commands.CheckAlarmsCommand do 8 @moduledoc """ 9 Exits with a non-zero code if the target node reports any alarms, 10 local or clusterwide. 11 12 This command is meant to be used in health checks. 13 """ 14 15 import RabbitMQ.CLI.Core.Alarms 16 import RabbitMQ.CLI.Core.Platform, only: [line_separator: 0] 17 18 @behaviour RabbitMQ.CLI.CommandBehaviour 19 20 use RabbitMQ.CLI.Core.AcceptsDefaultSwitchesAndTimeout 21 use RabbitMQ.CLI.Core.MergesNoDefaults 22 use RabbitMQ.CLI.Core.AcceptsNoPositionalArguments 23 use RabbitMQ.CLI.Core.RequiresRabbitAppRunning 24 25 def run([], %{node: node_name, timeout: timeout}) do 26 # Example response when there are alarms: 27 # 28 # [ 29 # file_descriptor_limit, 30 # {{resource_limit,disk,hare@warp10},[]}, 31 # {{resource_limit,memory,hare@warp10},[]}, 32 # {{resource_limit,disk,rabbit@warp10},[]}, 33 # {{resource_limit,memory,rabbit@warp10},[]} 34 # ] 35 # 36 # The topmost file_descriptor_limit alarm is node-local. 37 :rabbit_misc.rpc_call(node_name, :rabbit_alarm, :get_alarms, [], timeout) 38 end 39 40 def output([], %{formatter: "json"}) do 41 {:ok, %{"result" => "ok"}} 42 end 43 44 def output([], %{silent: true}) do 45 {:ok, :check_passed} 46 end 47 48 def output([], %{node: node_name}) do 49 {:ok, "Node #{node_name} reported no alarms, local or clusterwide"} 50 end 51 52 def output(alarms, %{node: node_name, formatter: "json"}) do 53 local = local_alarms(alarms, node_name) 54 global = clusterwide_alarms(alarms, node_name) 55 56 {:error, :check_failed, 57 %{ 58 "result" => "error", 59 "local" => alarm_lines(local, node_name), 60 "global" => alarm_lines(global, node_name), 61 "message" => "Node #{node_name} reported alarms" 62 }} 63 end 64 65 def output(alarms, %{silent: true} = _opts) when is_list(alarms) do 66 {:error, :check_failed} 67 end 68 69 def output(alarms, %{node: node_name}) when is_list(alarms) do 70 lines = alarm_lines(alarms, node_name) 71 72 {:error, :check_failed, Enum.join(lines, line_separator())} 73 end 74 75 use RabbitMQ.CLI.DefaultOutput 76 77 def help_section(), do: :observability_and_health_checks 78 79 def description(), do: "Health check that exits with a non-zero code if the target node reports any alarms, local or cluster-wide." 80 81 def usage, do: "check_alarms" 82 83 def banner([], %{node: node_name}) do 84 "Asking node #{node_name} to report any local resource alarms ..." 85 end 86end 87