1package autopilot
2
3import (
4	"time"
5
6	"github.com/hashicorp/serf/serf"
7)
8
9// Config holds the Autopilot configuration for a cluster.
10type Config struct {
11	// CleanupDeadServers controls whether to remove dead servers when a new
12	// server is added to the Raft peers.
13	CleanupDeadServers bool
14
15	// LastContactThreshold is the limit on the amount of time a server can go
16	// without leader contact before being considered unhealthy.
17	LastContactThreshold time.Duration
18
19	// MaxTrailingLogs is the amount of entries in the Raft Log that a server can
20	// be behind before being considered unhealthy.
21	MaxTrailingLogs uint64
22
23	// MinQuorum sets the minimum number of servers required in a cluster
24	// before autopilot can prune dead servers.
25	MinQuorum uint
26
27	// ServerStabilizationTime is the minimum amount of time a server must be
28	// in a stable, healthy state before it can be added to the cluster. Only
29	// applicable with Raft protocol version 3 or higher.
30	ServerStabilizationTime time.Duration
31
32	// (Enterprise-only) RedundancyZoneTag is the node tag to use for separating
33	// servers into zones for redundancy. If left blank, this feature will be disabled.
34	RedundancyZoneTag string
35
36	// (Enterprise-only) DisableUpgradeMigration will disable Autopilot's upgrade migration
37	// strategy of waiting until enough newer-versioned servers have been added to the
38	// cluster before promoting them to voters.
39	DisableUpgradeMigration bool
40
41	// (Enterprise-only) UpgradeVersionTag is the node tag to use for version info when
42	// performing upgrade migrations. If left blank, the Consul version will be used.
43	UpgradeVersionTag string
44
45	// CreateIndex/ModifyIndex store the create/modify indexes of this configuration.
46	CreateIndex uint64
47	ModifyIndex uint64
48}
49
50// ServerHealth is the health (from the leader's point of view) of a server.
51type ServerHealth struct {
52	// ID is the raft ID of the server.
53	ID string
54
55	// Name is the node name of the server.
56	Name string
57
58	// Address is the address of the server.
59	Address string
60
61	// The status of the SerfHealth check for the server.
62	SerfStatus serf.MemberStatus
63
64	// Version is the version of the server.
65	Version string
66
67	// Leader is whether this server is currently the leader.
68	Leader bool
69
70	// LastContact is the time since this node's last contact with the leader.
71	LastContact time.Duration
72
73	// LastTerm is the highest leader term this server has a record of in its Raft log.
74	LastTerm uint64
75
76	// LastIndex is the last log index this server has a record of in its Raft log.
77	LastIndex uint64
78
79	// Healthy is whether or not the server is healthy according to the current
80	// Autopilot config.
81	Healthy bool
82
83	// Voter is whether this is a voting server.
84	Voter bool
85
86	// StableSince is the last time this server's Healthy value changed.
87	StableSince time.Time
88}
89
90// IsHealthy determines whether this ServerHealth is considered healthy
91// based on the given Autopilot config
92func (h *ServerHealth) IsHealthy(lastTerm uint64, leaderLastIndex uint64, autopilotConf *Config) bool {
93	if h.SerfStatus != serf.StatusAlive {
94		return false
95	}
96
97	if h.LastContact > autopilotConf.LastContactThreshold || h.LastContact < 0 {
98		return false
99	}
100
101	if h.LastTerm != lastTerm {
102		return false
103	}
104
105	if leaderLastIndex > autopilotConf.MaxTrailingLogs && h.LastIndex < leaderLastIndex-autopilotConf.MaxTrailingLogs {
106		return false
107	}
108
109	return true
110}
111
112// IsStable returns true if the ServerHealth shows a stable, passing state
113// according to the given AutopilotConfig
114func (h *ServerHealth) IsStable(now time.Time, conf *Config) bool {
115	if h == nil {
116		return false
117	}
118
119	if !h.Healthy {
120		return false
121	}
122
123	if now.Sub(h.StableSince) < conf.ServerStabilizationTime {
124		return false
125	}
126
127	return true
128}
129
130// ServerStats holds miscellaneous Raft metrics for a server
131type ServerStats struct {
132	// LastContact is the time since this node's last contact with the leader.
133	LastContact string
134
135	// LastTerm is the highest leader term this server has a record of in its Raft log.
136	LastTerm uint64
137
138	// LastIndex is the last log index this server has a record of in its Raft log.
139	LastIndex uint64
140}
141
142// OperatorHealthReply is a representation of the overall health of the cluster
143type OperatorHealthReply struct {
144	// Healthy is true if all the servers in the cluster are healthy.
145	Healthy bool
146
147	// FailureTolerance is the number of healthy servers that could be lost without
148	// an outage occurring.
149	FailureTolerance int
150
151	// Servers holds the health of each server.
152	Servers []ServerHealth
153}
154
155func (o *OperatorHealthReply) ServerHealth(id string) *ServerHealth {
156	for _, health := range o.Servers {
157		if health.ID == id {
158			return &health
159		}
160	}
161	return nil
162}
163