1package autopilot 2 3import ( 4 "time" 5 6 "github.com/hashicorp/serf/serf" 7) 8 9// Config holds the Autopilot configuration for a cluster. 10type Config struct { 11 // CleanupDeadServers controls whether to remove dead servers when a new 12 // server is added to the Raft peers. 13 CleanupDeadServers bool 14 15 // LastContactThreshold is the limit on the amount of time a server can go 16 // without leader contact before being considered unhealthy. 17 LastContactThreshold time.Duration 18 19 // MaxTrailingLogs is the amount of entries in the Raft Log that a server can 20 // be behind before being considered unhealthy. 21 MaxTrailingLogs uint64 22 23 // MinQuorum sets the minimum number of servers required in a cluster 24 // before autopilot can prune dead servers. 25 MinQuorum uint 26 27 // ServerStabilizationTime is the minimum amount of time a server must be 28 // in a stable, healthy state before it can be added to the cluster. Only 29 // applicable with Raft protocol version 3 or higher. 30 ServerStabilizationTime time.Duration 31 32 // (Enterprise-only) RedundancyZoneTag is the node tag to use for separating 33 // servers into zones for redundancy. If left blank, this feature will be disabled. 34 RedundancyZoneTag string 35 36 // (Enterprise-only) DisableUpgradeMigration will disable Autopilot's upgrade migration 37 // strategy of waiting until enough newer-versioned servers have been added to the 38 // cluster before promoting them to voters. 39 DisableUpgradeMigration bool 40 41 // (Enterprise-only) UpgradeVersionTag is the node tag to use for version info when 42 // performing upgrade migrations. If left blank, the Consul version will be used. 43 UpgradeVersionTag string 44 45 // CreateIndex/ModifyIndex store the create/modify indexes of this configuration. 46 CreateIndex uint64 47 ModifyIndex uint64 48} 49 50// ServerHealth is the health (from the leader's point of view) of a server. 51type ServerHealth struct { 52 // ID is the raft ID of the server. 53 ID string 54 55 // Name is the node name of the server. 56 Name string 57 58 // Address is the address of the server. 59 Address string 60 61 // The status of the SerfHealth check for the server. 62 SerfStatus serf.MemberStatus 63 64 // Version is the version of the server. 65 Version string 66 67 // Leader is whether this server is currently the leader. 68 Leader bool 69 70 // LastContact is the time since this node's last contact with the leader. 71 LastContact time.Duration 72 73 // LastTerm is the highest leader term this server has a record of in its Raft log. 74 LastTerm uint64 75 76 // LastIndex is the last log index this server has a record of in its Raft log. 77 LastIndex uint64 78 79 // Healthy is whether or not the server is healthy according to the current 80 // Autopilot config. 81 Healthy bool 82 83 // Voter is whether this is a voting server. 84 Voter bool 85 86 // StableSince is the last time this server's Healthy value changed. 87 StableSince time.Time 88} 89 90// IsHealthy determines whether this ServerHealth is considered healthy 91// based on the given Autopilot config 92func (h *ServerHealth) IsHealthy(lastTerm uint64, leaderLastIndex uint64, autopilotConf *Config) bool { 93 if h.SerfStatus != serf.StatusAlive { 94 return false 95 } 96 97 if h.LastContact > autopilotConf.LastContactThreshold || h.LastContact < 0 { 98 return false 99 } 100 101 if h.LastTerm != lastTerm { 102 return false 103 } 104 105 if leaderLastIndex > autopilotConf.MaxTrailingLogs && h.LastIndex < leaderLastIndex-autopilotConf.MaxTrailingLogs { 106 return false 107 } 108 109 return true 110} 111 112// IsStable returns true if the ServerHealth shows a stable, passing state 113// according to the given AutopilotConfig 114func (h *ServerHealth) IsStable(now time.Time, conf *Config) bool { 115 if h == nil { 116 return false 117 } 118 119 if !h.Healthy { 120 return false 121 } 122 123 if now.Sub(h.StableSince) < conf.ServerStabilizationTime { 124 return false 125 } 126 127 return true 128} 129 130// ServerStats holds miscellaneous Raft metrics for a server 131type ServerStats struct { 132 // LastContact is the time since this node's last contact with the leader. 133 LastContact string 134 135 // LastTerm is the highest leader term this server has a record of in its Raft log. 136 LastTerm uint64 137 138 // LastIndex is the last log index this server has a record of in its Raft log. 139 LastIndex uint64 140} 141 142// OperatorHealthReply is a representation of the overall health of the cluster 143type OperatorHealthReply struct { 144 // Healthy is true if all the servers in the cluster are healthy. 145 Healthy bool 146 147 // FailureTolerance is the number of healthy servers that could be lost without 148 // an outage occurring. 149 FailureTolerance int 150 151 // Servers holds the health of each server. 152 Servers []ServerHealth 153} 154 155func (o *OperatorHealthReply) ServerHealth(id string) *ServerHealth { 156 for _, health := range o.Servers { 157 if health.ID == id { 158 return &health 159 } 160 } 161 return nil 162} 163