1package api
2
3import (
4	"bytes"
5	"fmt"
6	"io"
7	"strconv"
8	"strings"
9	"time"
10)
11
12// AutopilotConfiguration is used for querying/setting the Autopilot configuration.
13// Autopilot helps manage operator tasks related to Consul servers like removing
14// failed servers from the Raft quorum.
15type AutopilotConfiguration struct {
16	// CleanupDeadServers controls whether to remove dead servers from the Raft
17	// peer list when a new server joins
18	CleanupDeadServers bool
19
20	// LastContactThreshold is the limit on the amount of time a server can go
21	// without leader contact before being considered unhealthy.
22	LastContactThreshold *ReadableDuration
23
24	// MaxTrailingLogs is the amount of entries in the Raft Log that a server can
25	// be behind before being considered unhealthy.
26	MaxTrailingLogs uint64
27
28	// MinQuorum sets the minimum number of servers allowed in a cluster before
29	// autopilot can prune dead servers.
30	MinQuorum uint
31
32	// ServerStabilizationTime is the minimum amount of time a server must be
33	// in a stable, healthy state before it can be added to the cluster. Only
34	// applicable with Raft protocol version 3 or higher.
35	ServerStabilizationTime *ReadableDuration
36
37	// (Enterprise-only) RedundancyZoneTag is the node tag to use for separating
38	// servers into zones for redundancy. If left blank, this feature will be disabled.
39	RedundancyZoneTag string
40
41	// (Enterprise-only) DisableUpgradeMigration will disable Autopilot's upgrade migration
42	// strategy of waiting until enough newer-versioned servers have been added to the
43	// cluster before promoting them to voters.
44	DisableUpgradeMigration bool
45
46	// (Enterprise-only) UpgradeVersionTag is the node tag to use for version info when
47	// performing upgrade migrations. If left blank, the Consul version will be used.
48	UpgradeVersionTag string
49
50	// CreateIndex holds the index corresponding the creation of this configuration.
51	// This is a read-only field.
52	CreateIndex uint64
53
54	// ModifyIndex will be set to the index of the last update when retrieving the
55	// Autopilot configuration. Resubmitting a configuration with
56	// AutopilotCASConfiguration will perform a check-and-set operation which ensures
57	// there hasn't been a subsequent update since the configuration was retrieved.
58	ModifyIndex uint64
59}
60
61// ServerHealth is the health (from the leader's point of view) of a server.
62type ServerHealth struct {
63	// ID is the raft ID of the server.
64	ID string
65
66	// Name is the node name of the server.
67	Name string
68
69	// Address is the address of the server.
70	Address string
71
72	// The status of the SerfHealth check for the server.
73	SerfStatus string
74
75	// Version is the Consul version of the server.
76	Version string
77
78	// Leader is whether this server is currently the leader.
79	Leader bool
80
81	// LastContact is the time since this node's last contact with the leader.
82	LastContact *ReadableDuration
83
84	// LastTerm is the highest leader term this server has a record of in its Raft log.
85	LastTerm uint64
86
87	// LastIndex is the last log index this server has a record of in its Raft log.
88	LastIndex uint64
89
90	// Healthy is whether or not the server is healthy according to the current
91	// Autopilot config.
92	Healthy bool
93
94	// Voter is whether this is a voting server.
95	Voter bool
96
97	// StableSince is the last time this server's Healthy value changed.
98	StableSince time.Time
99}
100
101// OperatorHealthReply is a representation of the overall health of the cluster
102type OperatorHealthReply struct {
103	// Healthy is true if all the servers in the cluster are healthy.
104	Healthy bool
105
106	// FailureTolerance is the number of healthy servers that could be lost without
107	// an outage occurring.
108	FailureTolerance int
109
110	// Servers holds the health of each server.
111	Servers []ServerHealth
112}
113
114type AutopilotState struct {
115	Healthy                    bool
116	FailureTolerance           int
117	OptimisticFailureTolerance int
118
119	Servers         map[string]AutopilotServer
120	Leader          string
121	Voters          []string
122	ReadReplicas    []string                 `json:",omitempty"`
123	RedundancyZones map[string]AutopilotZone `json:",omitempty"`
124	Upgrade         *AutopilotUpgrade        `json:",omitempty"`
125}
126
127type AutopilotServer struct {
128	ID             string
129	Name           string
130	Address        string
131	NodeStatus     string
132	Version        string
133	LastContact    *ReadableDuration
134	LastTerm       uint64
135	LastIndex      uint64
136	Healthy        bool
137	StableSince    time.Time
138	RedundancyZone string `json:",omitempty"`
139	UpgradeVersion string `json:",omitempty"`
140	ReadReplica    bool
141	Status         AutopilotServerStatus
142	Meta           map[string]string
143	NodeType       AutopilotServerType
144}
145
146type AutopilotServerStatus string
147
148const (
149	AutopilotServerNone     AutopilotServerStatus = "none"
150	AutopilotServerLeader   AutopilotServerStatus = "leader"
151	AutopilotServerVoter    AutopilotServerStatus = "voter"
152	AutopilotServerNonVoter AutopilotServerStatus = "non-voter"
153	AutopilotServerStaging  AutopilotServerStatus = "staging"
154)
155
156type AutopilotServerType string
157
158const (
159	AutopilotTypeVoter          AutopilotServerType = "voter"
160	AutopilotTypeReadReplica    AutopilotServerType = "read-replica"
161	AutopilotTypeZoneVoter      AutopilotServerType = "zone-voter"
162	AutopilotTypeZoneExtraVoter AutopilotServerType = "zone-extra-voter"
163	AutopilotTypeZoneStandby    AutopilotServerType = "zone-standby"
164)
165
166type AutopilotZone struct {
167	Servers          []string
168	Voters           []string
169	FailureTolerance int
170}
171
172type AutopilotZoneUpgradeVersions struct {
173	TargetVersionVoters    []string `json:",omitempty"`
174	TargetVersionNonVoters []string `json:",omitempty"`
175	OtherVersionVoters     []string `json:",omitempty"`
176	OtherVersionNonVoters  []string `json:",omitempty"`
177}
178
179type AutopilotUpgrade struct {
180	Status                    AutopilotUpgradeStatus
181	TargetVersion             string                                  `json:",omitempty"`
182	TargetVersionVoters       []string                                `json:",omitempty"`
183	TargetVersionNonVoters    []string                                `json:",omitempty"`
184	TargetVersionReadReplicas []string                                `json:",omitempty"`
185	OtherVersionVoters        []string                                `json:",omitempty"`
186	OtherVersionNonVoters     []string                                `json:",omitempty"`
187	OtherVersionReadReplicas  []string                                `json:",omitempty"`
188	RedundancyZones           map[string]AutopilotZoneUpgradeVersions `json:",omitempty"`
189}
190
191type AutopilotUpgradeStatus string
192
193const (
194	// AutopilotUpgradeIdle is the status when no upgrade is in progress.
195	AutopilotUpgradeIdle AutopilotUpgradeStatus = "idle"
196
197	// AutopilotUpgradeAwaitNewVoters is the status when more servers of
198	// the target version must be added in order to start the promotion
199	// phase of the upgrade
200	AutopilotUpgradeAwaitNewVoters AutopilotUpgradeStatus = "await-new-voters"
201
202	// AutopilotUpgradePromoting is the status when autopilot is promoting
203	// servers of the target version.
204	AutopilotUpgradePromoting AutopilotUpgradeStatus = "promoting"
205
206	// AutopilotUpgradeDemoting is the status when autopilot is demoting
207	// servers not on the target version
208	AutopilotUpgradeDemoting AutopilotUpgradeStatus = "demoting"
209
210	// AutopilotUpgradeLeaderTransfer is the status when autopilot is transferring
211	// leadership from a server running an older version to a server
212	// using the target version.
213	AutopilotUpgradeLeaderTransfer AutopilotUpgradeStatus = "leader-transfer"
214
215	// AutopilotUpgradeAwaitNewServers is the status when autpilot has finished
216	// transferring leadership and has demoted all the other versioned
217	// servers but wants to indicate that more target version servers
218	// are needed to replace all the existing other version servers.
219	AutopilotUpgradeAwaitNewServers AutopilotUpgradeStatus = "await-new-servers"
220
221	// AutopilotUpgradeAwaitServerRemoval is the status when autopilot is waiting
222	// for the servers on non-target versions to be removed
223	AutopilotUpgradeAwaitServerRemoval AutopilotUpgradeStatus = "await-server-removal"
224
225	// AutopilotUpgradeDisabled is the status when automated ugprades are
226	// disabled in the autopilot configuration
227	AutopilotUpgradeDisabled AutopilotUpgradeStatus = "disabled"
228)
229
230// ReadableDuration is a duration type that is serialized to JSON in human readable format.
231type ReadableDuration time.Duration
232
233func NewReadableDuration(dur time.Duration) *ReadableDuration {
234	d := ReadableDuration(dur)
235	return &d
236}
237
238func (d *ReadableDuration) String() string {
239	return d.Duration().String()
240}
241
242func (d *ReadableDuration) Duration() time.Duration {
243	if d == nil {
244		return time.Duration(0)
245	}
246	return time.Duration(*d)
247}
248
249func (d *ReadableDuration) MarshalJSON() ([]byte, error) {
250	return []byte(fmt.Sprintf(`"%s"`, d.Duration().String())), nil
251}
252
253func (d *ReadableDuration) UnmarshalJSON(raw []byte) (err error) {
254	if d == nil {
255		return fmt.Errorf("cannot unmarshal to nil pointer")
256	}
257
258	var dur time.Duration
259	str := string(raw)
260	if len(str) >= 2 && str[0] == '"' && str[len(str)-1] == '"' {
261		// quoted string
262		dur, err = time.ParseDuration(str[1 : len(str)-1])
263		if err != nil {
264			return err
265		}
266	} else {
267		// no quotes, not a string
268		v, err := strconv.ParseFloat(str, 64)
269		if err != nil {
270			return err
271		}
272		dur = time.Duration(v)
273	}
274
275	*d = ReadableDuration(dur)
276	return nil
277}
278
279// AutopilotGetConfiguration is used to query the current Autopilot configuration.
280func (op *Operator) AutopilotGetConfiguration(q *QueryOptions) (*AutopilotConfiguration, error) {
281	r := op.c.newRequest("GET", "/v1/operator/autopilot/configuration")
282	r.setQueryOptions(q)
283	_, resp, err := requireOK(op.c.doRequest(r))
284	if err != nil {
285		return nil, err
286	}
287	defer closeResponseBody(resp)
288
289	var out AutopilotConfiguration
290	if err := decodeBody(resp, &out); err != nil {
291		return nil, err
292	}
293
294	return &out, nil
295}
296
297// AutopilotSetConfiguration is used to set the current Autopilot configuration.
298func (op *Operator) AutopilotSetConfiguration(conf *AutopilotConfiguration, q *WriteOptions) error {
299	r := op.c.newRequest("PUT", "/v1/operator/autopilot/configuration")
300	r.setWriteOptions(q)
301	r.obj = conf
302	_, resp, err := requireOK(op.c.doRequest(r))
303	if err != nil {
304		return err
305	}
306	closeResponseBody(resp)
307	return nil
308}
309
310// AutopilotCASConfiguration is used to perform a Check-And-Set update on the
311// Autopilot configuration. The ModifyIndex value will be respected. Returns
312// true on success or false on failures.
313func (op *Operator) AutopilotCASConfiguration(conf *AutopilotConfiguration, q *WriteOptions) (bool, error) {
314	r := op.c.newRequest("PUT", "/v1/operator/autopilot/configuration")
315	r.setWriteOptions(q)
316	r.params.Set("cas", strconv.FormatUint(conf.ModifyIndex, 10))
317	r.obj = conf
318	_, resp, err := requireOK(op.c.doRequest(r))
319	if err != nil {
320		return false, err
321	}
322	defer closeResponseBody(resp)
323
324	var buf bytes.Buffer
325	if _, err := io.Copy(&buf, resp.Body); err != nil {
326		return false, fmt.Errorf("Failed to read response: %v", err)
327	}
328	res := strings.Contains(buf.String(), "true")
329
330	return res, nil
331}
332
333// AutopilotServerHealth
334func (op *Operator) AutopilotServerHealth(q *QueryOptions) (*OperatorHealthReply, error) {
335	r := op.c.newRequest("GET", "/v1/operator/autopilot/health")
336	r.setQueryOptions(q)
337
338	// we cannot just use requireOK because this endpoint might use a 429 status to indicate
339	// that unhealthiness
340	_, resp, err := op.c.doRequest(r)
341	if err != nil {
342		if resp != nil {
343			closeResponseBody(resp)
344		}
345		return nil, err
346	}
347
348	// these are the only 2 status codes that would indicate that we should
349	// expect the body to contain the right format.
350	if resp.StatusCode != 200 && resp.StatusCode != 429 {
351		return nil, generateUnexpectedResponseCodeError(resp)
352	}
353
354	defer closeResponseBody(resp)
355
356	var out OperatorHealthReply
357	if err := decodeBody(resp, &out); err != nil {
358		return nil, err
359	}
360	return &out, nil
361}
362
363func (op *Operator) AutopilotState(q *QueryOptions) (*AutopilotState, error) {
364	r := op.c.newRequest("GET", "/v1/operator/autopilot/state")
365	r.setQueryOptions(q)
366	_, resp, err := requireOK(op.c.doRequest(r))
367	if err != nil {
368		return nil, err
369	}
370	defer closeResponseBody(resp)
371
372	var out AutopilotState
373	if err := decodeBody(resp, &out); err != nil {
374		return nil, err
375	}
376
377	return &out, nil
378}
379