1package command
2
3import (
4	"fmt"
5	"strconv"
6	"strings"
7	"time"
8
9	"github.com/posener/complete"
10)
11
12const (
13	HealthCritical = 2
14	HealthWarn     = 1
15	HealthPass     = 0
16	HealthUnknown  = 3
17)
18
19type AgentCheckCommand struct {
20	Meta
21}
22
23func (c *AgentCheckCommand) Help() string {
24	helpText := `
25Usage: nomad check [options]
26
27  Display state of the Nomad agent. The exit code of the command is Nagios
28  compatible and could be used with alerting systems.
29
30General Options:
31
32  ` + generalOptionsUsage() + `
33
34Agent Check Options:
35
36  -min-peers
37     Minimum number of peers that a server is expected to know.
38
39  -min-servers
40     Minimum number of servers that a client is expected to know.
41`
42
43	return strings.TrimSpace(helpText)
44}
45
46func (c *AgentCheckCommand) Synopsis() string {
47	return "Displays health of the local Nomad agent"
48}
49
50func (c *AgentCheckCommand) Name() string { return "check" }
51
52func (c *AgentCheckCommand) Run(args []string) int {
53	var minPeers, minServers int
54
55	flags := c.Meta.FlagSet("check", FlagSetClient)
56	flags.Usage = func() { c.Ui.Output(c.Help()) }
57	flags.IntVar(&minPeers, "min-peers", 0, "")
58	flags.IntVar(&minServers, "min-servers", 1, "")
59
60	if err := flags.Parse(args); err != nil {
61		return 1
62	}
63
64	args = flags.Args()
65	if len(args) > 0 {
66		c.Ui.Error("This command takes no arguments")
67		c.Ui.Error(commandErrorText(c))
68		return 1
69	}
70
71	client, err := c.Meta.Client()
72	if err != nil {
73		c.Ui.Error(fmt.Sprintf("error initializing client: %s", err))
74		return HealthCritical
75	}
76
77	info, err := client.Agent().Self()
78	if err != nil {
79		c.Ui.Output(fmt.Sprintf("unable to query agent info: %v", err))
80		return HealthCritical
81	}
82	if _, ok := info.Stats["nomad"]; ok {
83		return c.checkServerHealth(info.Stats, minPeers)
84	}
85
86	if clientStats, ok := info.Stats["client"]; ok {
87		return c.checkClientHealth(clientStats, minServers)
88	}
89	return HealthWarn
90}
91
92// checkServerHealth returns the health of a server.
93// TODO Add more rules for determining server health
94func (c *AgentCheckCommand) checkServerHealth(info map[string]map[string]string, minPeers int) int {
95	raft := info["raft"]
96	knownPeers, err := strconv.Atoi(raft["num_peers"])
97	if err != nil {
98		c.Ui.Output(fmt.Sprintf("unable to get known peers: %v", err))
99		return HealthCritical
100	}
101
102	if knownPeers < minPeers {
103		c.Ui.Output(fmt.Sprintf("known peers: %v, is less than expected number of peers: %v", knownPeers, minPeers))
104		return HealthCritical
105	}
106	return HealthPass
107}
108
109// checkClientHealth returns the health of a client
110func (c *AgentCheckCommand) checkClientHealth(clientStats map[string]string, minServers int) int {
111	knownServers, err := strconv.Atoi(clientStats["known_servers"])
112	if err != nil {
113		c.Ui.Output(fmt.Sprintf("unable to get known servers: %v", err))
114		return HealthCritical
115	}
116
117	heartbeatTTL, err := time.ParseDuration(clientStats["heartbeat_ttl"])
118	if err != nil {
119		c.Ui.Output(fmt.Sprintf("unable to parse heartbeat TTL: %v", err))
120		return HealthCritical
121	}
122
123	lastHeartbeat, err := time.ParseDuration(clientStats["last_heartbeat"])
124	if err != nil {
125		c.Ui.Output(fmt.Sprintf("unable to parse last heartbeat: %v", err))
126		return HealthCritical
127	}
128
129	if lastHeartbeat > heartbeatTTL {
130		c.Ui.Output(fmt.Sprintf("last heartbeat was %q time ago, expected heartbeat ttl: %q", lastHeartbeat, heartbeatTTL))
131		return HealthCritical
132	}
133
134	if knownServers < minServers {
135		c.Ui.Output(fmt.Sprintf("known servers: %v, is less than expected number of servers: %v", knownServers, minServers))
136		return HealthCritical
137	}
138
139	return HealthPass
140}
141
142func (c *AgentCheckCommand) AutocompleteFlags() complete.Flags {
143	return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
144		complete.Flags{
145			"-min-peers":   complete.PredictAnything,
146			"-min-servers": complete.PredictAnything,
147		})
148}
149
150func (c *AgentCheckCommand) AutocompleteArgs() complete.Predictor {
151	return complete.PredictNothing
152}
153