1## This Source Code Form is subject to the terms of the Mozilla Public
2## License, v. 2.0. If a copy of the MPL was not distributed with this
3## file, You can obtain one at https://mozilla.org/MPL/2.0/.
4##
5## Copyright (c) 2007-2021 VMware, Inc. or its affiliates.  All rights reserved.
6
7defmodule RabbitMQ.CLI.Diagnostics.Commands.CheckAlarmsCommand do
8  @moduledoc """
9  Exits with a non-zero code if the target node reports any alarms,
10  local or clusterwide.
11
12  This command is meant to be used in health checks.
13  """
14
15  import RabbitMQ.CLI.Core.Alarms
16  import RabbitMQ.CLI.Core.Platform, only: [line_separator: 0]
17
18  @behaviour RabbitMQ.CLI.CommandBehaviour
19
20  use RabbitMQ.CLI.Core.AcceptsDefaultSwitchesAndTimeout
21  use RabbitMQ.CLI.Core.MergesNoDefaults
22  use RabbitMQ.CLI.Core.AcceptsNoPositionalArguments
23  use RabbitMQ.CLI.Core.RequiresRabbitAppRunning
24
25  def run([], %{node: node_name, timeout: timeout}) do
26    # Example response when there are alarms:
27    #
28    # [
29    #  file_descriptor_limit,
30    #  {{resource_limit,disk,hare@warp10},[]},
31    #  {{resource_limit,memory,hare@warp10},[]},
32    #  {{resource_limit,disk,rabbit@warp10},[]},
33    #  {{resource_limit,memory,rabbit@warp10},[]}
34    # ]
35    #
36    # The topmost file_descriptor_limit alarm is node-local.
37    :rabbit_misc.rpc_call(node_name, :rabbit_alarm, :get_alarms, [], timeout)
38  end
39
40  def output([], %{formatter: "json"}) do
41    {:ok, %{"result" => "ok"}}
42  end
43
44  def output([], %{silent: true}) do
45    {:ok, :check_passed}
46  end
47
48  def output([], %{node: node_name}) do
49    {:ok, "Node #{node_name} reported no alarms, local or clusterwide"}
50  end
51
52  def output(alarms, %{node: node_name, formatter: "json"}) do
53    local = local_alarms(alarms, node_name)
54    global = clusterwide_alarms(alarms, node_name)
55
56    {:error, :check_failed,
57     %{
58       "result" => "error",
59       "local" => alarm_lines(local, node_name),
60       "global" => alarm_lines(global, node_name),
61       "message" => "Node #{node_name} reported alarms"
62     }}
63  end
64
65  def output(alarms, %{silent: true} = _opts) when is_list(alarms) do
66    {:error, :check_failed}
67  end
68
69  def output(alarms, %{node: node_name}) when is_list(alarms) do
70    lines = alarm_lines(alarms, node_name)
71
72    {:error, :check_failed, Enum.join(lines, line_separator())}
73  end
74
75  use RabbitMQ.CLI.DefaultOutput
76
77  def help_section(), do: :observability_and_health_checks
78
79  def description(), do: "Health check that exits with a non-zero code if the target node reports any alarms, local or cluster-wide."
80
81  def usage, do: "check_alarms"
82
83  def banner([], %{node: node_name}) do
84    "Asking node #{node_name} to report any local resource alarms ..."
85  end
86end
87