1// Copyright 2015 The etcd Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package etcdserver 16 17import ( 18 "context" 19 "fmt" 20 "path/filepath" 21 "sort" 22 "strings" 23 "time" 24 25 "go.etcd.io/etcd/pkg/netutil" 26 "go.etcd.io/etcd/pkg/transport" 27 "go.etcd.io/etcd/pkg/types" 28 29 bolt "go.etcd.io/bbolt" 30 "go.uber.org/zap" 31 "go.uber.org/zap/zapcore" 32) 33 34// ServerConfig holds the configuration of etcd as taken from the command line or discovery. 35type ServerConfig struct { 36 Name string 37 DiscoveryURL string 38 DiscoveryProxy string 39 ClientURLs types.URLs 40 PeerURLs types.URLs 41 DataDir string 42 // DedicatedWALDir config will make the etcd to write the WAL to the WALDir 43 // rather than the dataDir/member/wal. 44 DedicatedWALDir string 45 46 SnapshotCount uint64 47 48 // SnapshotCatchUpEntries is the number of entries for a slow follower 49 // to catch-up after compacting the raft storage entries. 50 // We expect the follower has a millisecond level latency with the leader. 51 // The max throughput is around 10K. Keep a 5K entries is enough for helping 52 // follower to catch up. 53 // WARNING: only change this for tests. Always use "DefaultSnapshotCatchUpEntries" 54 SnapshotCatchUpEntries uint64 55 56 MaxSnapFiles uint 57 MaxWALFiles uint 58 59 // BackendBatchInterval is the maximum time before commit the backend transaction. 60 BackendBatchInterval time.Duration 61 // BackendBatchLimit is the maximum operations before commit the backend transaction. 62 BackendBatchLimit int 63 64 // BackendFreelistType is the type of the backend boltdb freelist. 65 BackendFreelistType bolt.FreelistType 66 67 InitialPeerURLsMap types.URLsMap 68 InitialClusterToken string 69 NewCluster bool 70 PeerTLSInfo transport.TLSInfo 71 72 CORS map[string]struct{} 73 74 // HostWhitelist lists acceptable hostnames from client requests. 75 // If server is insecure (no TLS), server only accepts requests 76 // whose Host header value exists in this white list. 77 HostWhitelist map[string]struct{} 78 79 TickMs uint 80 ElectionTicks int 81 82 // InitialElectionTickAdvance is true, then local member fast-forwards 83 // election ticks to speed up "initial" leader election trigger. This 84 // benefits the case of larger election ticks. For instance, cross 85 // datacenter deployment may require longer election timeout of 10-second. 86 // If true, local node does not need wait up to 10-second. Instead, 87 // forwards its election ticks to 8-second, and have only 2-second left 88 // before leader election. 89 // 90 // Major assumptions are that: 91 // - cluster has no active leader thus advancing ticks enables faster 92 // leader election, or 93 // - cluster already has an established leader, and rejoining follower 94 // is likely to receive heartbeats from the leader after tick advance 95 // and before election timeout. 96 // 97 // However, when network from leader to rejoining follower is congested, 98 // and the follower does not receive leader heartbeat within left election 99 // ticks, disruptive election has to happen thus affecting cluster 100 // availabilities. 101 // 102 // Disabling this would slow down initial bootstrap process for cross 103 // datacenter deployments. Make your own tradeoffs by configuring 104 // --initial-election-tick-advance at the cost of slow initial bootstrap. 105 // 106 // If single-node, it advances ticks regardless. 107 // 108 // See https://github.com/etcd-io/etcd/issues/9333 for more detail. 109 InitialElectionTickAdvance bool 110 111 BootstrapTimeout time.Duration 112 113 AutoCompactionRetention time.Duration 114 AutoCompactionMode string 115 CompactionBatchLimit int 116 QuotaBackendBytes int64 117 MaxTxnOps uint 118 119 // MaxRequestBytes is the maximum request size to send over raft. 120 MaxRequestBytes uint 121 122 StrictReconfigCheck bool 123 124 // ClientCertAuthEnabled is true when cert has been signed by the client CA. 125 ClientCertAuthEnabled bool 126 127 AuthToken string 128 BcryptCost uint 129 130 // InitialCorruptCheck is true to check data corruption on boot 131 // before serving any peer/client traffic. 132 InitialCorruptCheck bool 133 CorruptCheckTime time.Duration 134 135 // PreVote is true to enable Raft Pre-Vote. 136 PreVote bool 137 138 // Logger logs server-side operations. 139 // If not nil, it disables "capnslog" and uses the given logger. 140 Logger *zap.Logger 141 142 // LoggerConfig is server logger configuration for Raft logger. 143 // Must be either: "LoggerConfig != nil" or "LoggerCore != nil && LoggerWriteSyncer != nil". 144 LoggerConfig *zap.Config 145 // LoggerCore is "zapcore.Core" for raft logger. 146 // Must be either: "LoggerConfig != nil" or "LoggerCore != nil && LoggerWriteSyncer != nil". 147 LoggerCore zapcore.Core 148 LoggerWriteSyncer zapcore.WriteSyncer 149 150 ForceNewCluster bool 151 152 // EnableLeaseCheckpoint enables primary lessor to persist lease remainingTTL to prevent indefinite auto-renewal of long lived leases. 153 EnableLeaseCheckpoint bool 154 // LeaseCheckpointInterval time.Duration is the wait duration between lease checkpoints. 155 LeaseCheckpointInterval time.Duration 156 157 EnableGRPCGateway bool 158} 159 160// VerifyBootstrap sanity-checks the initial config for bootstrap case 161// and returns an error for things that should never happen. 162func (c *ServerConfig) VerifyBootstrap() error { 163 if err := c.hasLocalMember(); err != nil { 164 return err 165 } 166 if err := c.advertiseMatchesCluster(); err != nil { 167 return err 168 } 169 if checkDuplicateURL(c.InitialPeerURLsMap) { 170 return fmt.Errorf("initial cluster %s has duplicate url", c.InitialPeerURLsMap) 171 } 172 if c.InitialPeerURLsMap.String() == "" && c.DiscoveryURL == "" { 173 return fmt.Errorf("initial cluster unset and no discovery URL found") 174 } 175 return nil 176} 177 178// VerifyJoinExisting sanity-checks the initial config for join existing cluster 179// case and returns an error for things that should never happen. 180func (c *ServerConfig) VerifyJoinExisting() error { 181 // The member has announced its peer urls to the cluster before starting; no need to 182 // set the configuration again. 183 if err := c.hasLocalMember(); err != nil { 184 return err 185 } 186 if checkDuplicateURL(c.InitialPeerURLsMap) { 187 return fmt.Errorf("initial cluster %s has duplicate url", c.InitialPeerURLsMap) 188 } 189 if c.DiscoveryURL != "" { 190 return fmt.Errorf("discovery URL should not be set when joining existing initial cluster") 191 } 192 return nil 193} 194 195// hasLocalMember checks that the cluster at least contains the local server. 196func (c *ServerConfig) hasLocalMember() error { 197 if urls := c.InitialPeerURLsMap[c.Name]; urls == nil { 198 return fmt.Errorf("couldn't find local name %q in the initial cluster configuration", c.Name) 199 } 200 return nil 201} 202 203// advertiseMatchesCluster confirms peer URLs match those in the cluster peer list. 204func (c *ServerConfig) advertiseMatchesCluster() error { 205 urls, apurls := c.InitialPeerURLsMap[c.Name], c.PeerURLs.StringSlice() 206 urls.Sort() 207 sort.Strings(apurls) 208 ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second) 209 defer cancel() 210 ok, err := netutil.URLStringsEqual(ctx, c.Logger, apurls, urls.StringSlice()) 211 if ok { 212 return nil 213 } 214 215 initMap, apMap := make(map[string]struct{}), make(map[string]struct{}) 216 for _, url := range c.PeerURLs { 217 apMap[url.String()] = struct{}{} 218 } 219 for _, url := range c.InitialPeerURLsMap[c.Name] { 220 initMap[url.String()] = struct{}{} 221 } 222 223 missing := []string{} 224 for url := range initMap { 225 if _, ok := apMap[url]; !ok { 226 missing = append(missing, url) 227 } 228 } 229 if len(missing) > 0 { 230 for i := range missing { 231 missing[i] = c.Name + "=" + missing[i] 232 } 233 mstr := strings.Join(missing, ",") 234 apStr := strings.Join(apurls, ",") 235 return fmt.Errorf("--initial-cluster has %s but missing from --initial-advertise-peer-urls=%s (%v)", mstr, apStr, err) 236 } 237 238 for url := range apMap { 239 if _, ok := initMap[url]; !ok { 240 missing = append(missing, url) 241 } 242 } 243 if len(missing) > 0 { 244 mstr := strings.Join(missing, ",") 245 umap := types.URLsMap(map[string]types.URLs{c.Name: c.PeerURLs}) 246 return fmt.Errorf("--initial-advertise-peer-urls has %s but missing from --initial-cluster=%s", mstr, umap.String()) 247 } 248 249 // resolved URLs from "--initial-advertise-peer-urls" and "--initial-cluster" did not match or failed 250 apStr := strings.Join(apurls, ",") 251 umap := types.URLsMap(map[string]types.URLs{c.Name: c.PeerURLs}) 252 return fmt.Errorf("failed to resolve %s to match --initial-cluster=%s (%v)", apStr, umap.String(), err) 253} 254 255func (c *ServerConfig) MemberDir() string { return filepath.Join(c.DataDir, "member") } 256 257func (c *ServerConfig) WALDir() string { 258 if c.DedicatedWALDir != "" { 259 return c.DedicatedWALDir 260 } 261 return filepath.Join(c.MemberDir(), "wal") 262} 263 264func (c *ServerConfig) SnapDir() string { return filepath.Join(c.MemberDir(), "snap") } 265 266func (c *ServerConfig) ShouldDiscover() bool { return c.DiscoveryURL != "" } 267 268// ReqTimeout returns timeout for request to finish. 269func (c *ServerConfig) ReqTimeout() time.Duration { 270 // 5s for queue waiting, computation and disk IO delay 271 // + 2 * election timeout for possible leader election 272 return 5*time.Second + 2*time.Duration(c.ElectionTicks*int(c.TickMs))*time.Millisecond 273} 274 275func (c *ServerConfig) electionTimeout() time.Duration { 276 return time.Duration(c.ElectionTicks*int(c.TickMs)) * time.Millisecond 277} 278 279func (c *ServerConfig) peerDialTimeout() time.Duration { 280 // 1s for queue wait and election timeout 281 return time.Second + time.Duration(c.ElectionTicks*int(c.TickMs))*time.Millisecond 282} 283 284func checkDuplicateURL(urlsmap types.URLsMap) bool { 285 um := make(map[string]bool) 286 for _, urls := range urlsmap { 287 for _, url := range urls { 288 u := url.String() 289 if um[u] { 290 return true 291 } 292 um[u] = true 293 } 294 } 295 return false 296} 297 298func (c *ServerConfig) bootstrapTimeout() time.Duration { 299 if c.BootstrapTimeout != 0 { 300 return c.BootstrapTimeout 301 } 302 return time.Second 303} 304 305func (c *ServerConfig) backendPath() string { return filepath.Join(c.SnapDir(), "db") } 306