1// Copyright 2015 The etcd Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package etcdserver
16
17import (
18	"context"
19	"fmt"
20	"path/filepath"
21	"sort"
22	"strings"
23	"time"
24
25	"go.etcd.io/etcd/pkg/netutil"
26	"go.etcd.io/etcd/pkg/transport"
27	"go.etcd.io/etcd/pkg/types"
28
29	bolt "go.etcd.io/bbolt"
30	"go.uber.org/zap"
31	"go.uber.org/zap/zapcore"
32)
33
34// ServerConfig holds the configuration of etcd as taken from the command line or discovery.
35type ServerConfig struct {
36	Name           string
37	DiscoveryURL   string
38	DiscoveryProxy string
39	ClientURLs     types.URLs
40	PeerURLs       types.URLs
41	DataDir        string
42	// DedicatedWALDir config will make the etcd to write the WAL to the WALDir
43	// rather than the dataDir/member/wal.
44	DedicatedWALDir string
45
46	SnapshotCount uint64
47
48	// SnapshotCatchUpEntries is the number of entries for a slow follower
49	// to catch-up after compacting the raft storage entries.
50	// We expect the follower has a millisecond level latency with the leader.
51	// The max throughput is around 10K. Keep a 5K entries is enough for helping
52	// follower to catch up.
53	// WARNING: only change this for tests. Always use "DefaultSnapshotCatchUpEntries"
54	SnapshotCatchUpEntries uint64
55
56	MaxSnapFiles uint
57	MaxWALFiles  uint
58
59	// BackendBatchInterval is the maximum time before commit the backend transaction.
60	BackendBatchInterval time.Duration
61	// BackendBatchLimit is the maximum operations before commit the backend transaction.
62	BackendBatchLimit int
63
64	// BackendFreelistType is the type of the backend boltdb freelist.
65	BackendFreelistType bolt.FreelistType
66
67	InitialPeerURLsMap  types.URLsMap
68	InitialClusterToken string
69	NewCluster          bool
70	PeerTLSInfo         transport.TLSInfo
71
72	CORS map[string]struct{}
73
74	// HostWhitelist lists acceptable hostnames from client requests.
75	// If server is insecure (no TLS), server only accepts requests
76	// whose Host header value exists in this white list.
77	HostWhitelist map[string]struct{}
78
79	TickMs        uint
80	ElectionTicks int
81
82	// InitialElectionTickAdvance is true, then local member fast-forwards
83	// election ticks to speed up "initial" leader election trigger. This
84	// benefits the case of larger election ticks. For instance, cross
85	// datacenter deployment may require longer election timeout of 10-second.
86	// If true, local node does not need wait up to 10-second. Instead,
87	// forwards its election ticks to 8-second, and have only 2-second left
88	// before leader election.
89	//
90	// Major assumptions are that:
91	//  - cluster has no active leader thus advancing ticks enables faster
92	//    leader election, or
93	//  - cluster already has an established leader, and rejoining follower
94	//    is likely to receive heartbeats from the leader after tick advance
95	//    and before election timeout.
96	//
97	// However, when network from leader to rejoining follower is congested,
98	// and the follower does not receive leader heartbeat within left election
99	// ticks, disruptive election has to happen thus affecting cluster
100	// availabilities.
101	//
102	// Disabling this would slow down initial bootstrap process for cross
103	// datacenter deployments. Make your own tradeoffs by configuring
104	// --initial-election-tick-advance at the cost of slow initial bootstrap.
105	//
106	// If single-node, it advances ticks regardless.
107	//
108	// See https://github.com/etcd-io/etcd/issues/9333 for more detail.
109	InitialElectionTickAdvance bool
110
111	BootstrapTimeout time.Duration
112
113	AutoCompactionRetention time.Duration
114	AutoCompactionMode      string
115	CompactionBatchLimit    int
116	QuotaBackendBytes       int64
117	MaxTxnOps               uint
118
119	// MaxRequestBytes is the maximum request size to send over raft.
120	MaxRequestBytes uint
121
122	StrictReconfigCheck bool
123
124	// ClientCertAuthEnabled is true when cert has been signed by the client CA.
125	ClientCertAuthEnabled bool
126
127	AuthToken  string
128	BcryptCost uint
129
130	// InitialCorruptCheck is true to check data corruption on boot
131	// before serving any peer/client traffic.
132	InitialCorruptCheck bool
133	CorruptCheckTime    time.Duration
134
135	// PreVote is true to enable Raft Pre-Vote.
136	PreVote bool
137
138	// Logger logs server-side operations.
139	// If not nil, it disables "capnslog" and uses the given logger.
140	Logger *zap.Logger
141
142	// LoggerConfig is server logger configuration for Raft logger.
143	// Must be either: "LoggerConfig != nil" or "LoggerCore != nil && LoggerWriteSyncer != nil".
144	LoggerConfig *zap.Config
145	// LoggerCore is "zapcore.Core" for raft logger.
146	// Must be either: "LoggerConfig != nil" or "LoggerCore != nil && LoggerWriteSyncer != nil".
147	LoggerCore        zapcore.Core
148	LoggerWriteSyncer zapcore.WriteSyncer
149
150	ForceNewCluster bool
151
152	// EnableLeaseCheckpoint enables primary lessor to persist lease remainingTTL to prevent indefinite auto-renewal of long lived leases.
153	EnableLeaseCheckpoint bool
154	// LeaseCheckpointInterval time.Duration is the wait duration between lease checkpoints.
155	LeaseCheckpointInterval time.Duration
156
157	EnableGRPCGateway bool
158}
159
160// VerifyBootstrap sanity-checks the initial config for bootstrap case
161// and returns an error for things that should never happen.
162func (c *ServerConfig) VerifyBootstrap() error {
163	if err := c.hasLocalMember(); err != nil {
164		return err
165	}
166	if err := c.advertiseMatchesCluster(); err != nil {
167		return err
168	}
169	if checkDuplicateURL(c.InitialPeerURLsMap) {
170		return fmt.Errorf("initial cluster %s has duplicate url", c.InitialPeerURLsMap)
171	}
172	if c.InitialPeerURLsMap.String() == "" && c.DiscoveryURL == "" {
173		return fmt.Errorf("initial cluster unset and no discovery URL found")
174	}
175	return nil
176}
177
178// VerifyJoinExisting sanity-checks the initial config for join existing cluster
179// case and returns an error for things that should never happen.
180func (c *ServerConfig) VerifyJoinExisting() error {
181	// The member has announced its peer urls to the cluster before starting; no need to
182	// set the configuration again.
183	if err := c.hasLocalMember(); err != nil {
184		return err
185	}
186	if checkDuplicateURL(c.InitialPeerURLsMap) {
187		return fmt.Errorf("initial cluster %s has duplicate url", c.InitialPeerURLsMap)
188	}
189	if c.DiscoveryURL != "" {
190		return fmt.Errorf("discovery URL should not be set when joining existing initial cluster")
191	}
192	return nil
193}
194
195// hasLocalMember checks that the cluster at least contains the local server.
196func (c *ServerConfig) hasLocalMember() error {
197	if urls := c.InitialPeerURLsMap[c.Name]; urls == nil {
198		return fmt.Errorf("couldn't find local name %q in the initial cluster configuration", c.Name)
199	}
200	return nil
201}
202
203// advertiseMatchesCluster confirms peer URLs match those in the cluster peer list.
204func (c *ServerConfig) advertiseMatchesCluster() error {
205	urls, apurls := c.InitialPeerURLsMap[c.Name], c.PeerURLs.StringSlice()
206	urls.Sort()
207	sort.Strings(apurls)
208	ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
209	defer cancel()
210	ok, err := netutil.URLStringsEqual(ctx, c.Logger, apurls, urls.StringSlice())
211	if ok {
212		return nil
213	}
214
215	initMap, apMap := make(map[string]struct{}), make(map[string]struct{})
216	for _, url := range c.PeerURLs {
217		apMap[url.String()] = struct{}{}
218	}
219	for _, url := range c.InitialPeerURLsMap[c.Name] {
220		initMap[url.String()] = struct{}{}
221	}
222
223	missing := []string{}
224	for url := range initMap {
225		if _, ok := apMap[url]; !ok {
226			missing = append(missing, url)
227		}
228	}
229	if len(missing) > 0 {
230		for i := range missing {
231			missing[i] = c.Name + "=" + missing[i]
232		}
233		mstr := strings.Join(missing, ",")
234		apStr := strings.Join(apurls, ",")
235		return fmt.Errorf("--initial-cluster has %s but missing from --initial-advertise-peer-urls=%s (%v)", mstr, apStr, err)
236	}
237
238	for url := range apMap {
239		if _, ok := initMap[url]; !ok {
240			missing = append(missing, url)
241		}
242	}
243	if len(missing) > 0 {
244		mstr := strings.Join(missing, ",")
245		umap := types.URLsMap(map[string]types.URLs{c.Name: c.PeerURLs})
246		return fmt.Errorf("--initial-advertise-peer-urls has %s but missing from --initial-cluster=%s", mstr, umap.String())
247	}
248
249	// resolved URLs from "--initial-advertise-peer-urls" and "--initial-cluster" did not match or failed
250	apStr := strings.Join(apurls, ",")
251	umap := types.URLsMap(map[string]types.URLs{c.Name: c.PeerURLs})
252	return fmt.Errorf("failed to resolve %s to match --initial-cluster=%s (%v)", apStr, umap.String(), err)
253}
254
255func (c *ServerConfig) MemberDir() string { return filepath.Join(c.DataDir, "member") }
256
257func (c *ServerConfig) WALDir() string {
258	if c.DedicatedWALDir != "" {
259		return c.DedicatedWALDir
260	}
261	return filepath.Join(c.MemberDir(), "wal")
262}
263
264func (c *ServerConfig) SnapDir() string { return filepath.Join(c.MemberDir(), "snap") }
265
266func (c *ServerConfig) ShouldDiscover() bool { return c.DiscoveryURL != "" }
267
268// ReqTimeout returns timeout for request to finish.
269func (c *ServerConfig) ReqTimeout() time.Duration {
270	// 5s for queue waiting, computation and disk IO delay
271	// + 2 * election timeout for possible leader election
272	return 5*time.Second + 2*time.Duration(c.ElectionTicks*int(c.TickMs))*time.Millisecond
273}
274
275func (c *ServerConfig) electionTimeout() time.Duration {
276	return time.Duration(c.ElectionTicks*int(c.TickMs)) * time.Millisecond
277}
278
279func (c *ServerConfig) peerDialTimeout() time.Duration {
280	// 1s for queue wait and election timeout
281	return time.Second + time.Duration(c.ElectionTicks*int(c.TickMs))*time.Millisecond
282}
283
284func checkDuplicateURL(urlsmap types.URLsMap) bool {
285	um := make(map[string]bool)
286	for _, urls := range urlsmap {
287		for _, url := range urls {
288			u := url.String()
289			if um[u] {
290				return true
291			}
292			um[u] = true
293		}
294	}
295	return false
296}
297
298func (c *ServerConfig) bootstrapTimeout() time.Duration {
299	if c.BootstrapTimeout != 0 {
300		return c.BootstrapTimeout
301	}
302	return time.Second
303}
304
305func (c *ServerConfig) backendPath() string { return filepath.Join(c.SnapDir(), "db") }
306