1// Copyright 2016 The etcd Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package embed
16
17import (
18	"context"
19	"crypto/tls"
20	"fmt"
21	"io/ioutil"
22	defaultLog "log"
23	"net"
24	"net/http"
25	"net/url"
26	"runtime"
27	"sort"
28	"strconv"
29	"sync"
30	"time"
31
32	"go.etcd.io/etcd/etcdserver"
33	"go.etcd.io/etcd/etcdserver/api/etcdhttp"
34	"go.etcd.io/etcd/etcdserver/api/rafthttp"
35	"go.etcd.io/etcd/etcdserver/api/v2http"
36	"go.etcd.io/etcd/etcdserver/api/v2v3"
37	"go.etcd.io/etcd/etcdserver/api/v3client"
38	"go.etcd.io/etcd/etcdserver/api/v3rpc"
39	"go.etcd.io/etcd/pkg/debugutil"
40	runtimeutil "go.etcd.io/etcd/pkg/runtime"
41	"go.etcd.io/etcd/pkg/transport"
42	"go.etcd.io/etcd/pkg/types"
43	"go.etcd.io/etcd/version"
44
45	"github.com/coreos/pkg/capnslog"
46	grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus"
47	"github.com/soheilhy/cmux"
48	"go.uber.org/zap"
49	"google.golang.org/grpc"
50	"google.golang.org/grpc/keepalive"
51)
52
53var plog = capnslog.NewPackageLogger("go.etcd.io/etcd", "embed")
54
55const (
56	// internal fd usage includes disk usage and transport usage.
57	// To read/write snapshot, snap pkg needs 1. In normal case, wal pkg needs
58	// at most 2 to read/lock/write WALs. One case that it needs to 2 is to
59	// read all logs after some snapshot index, which locates at the end of
60	// the second last and the head of the last. For purging, it needs to read
61	// directory, so it needs 1. For fd monitor, it needs 1.
62	// For transport, rafthttp builds two long-polling connections and at most
63	// four temporary connections with each member. There are at most 9 members
64	// in a cluster, so it should reserve 96.
65	// For the safety, we set the total reserved number to 150.
66	reservedInternalFDNum = 150
67)
68
69// Etcd contains a running etcd server and its listeners.
70type Etcd struct {
71	Peers   []*peerListener
72	Clients []net.Listener
73	// a map of contexts for the servers that serves client requests.
74	sctxs            map[string]*serveCtx
75	metricsListeners []net.Listener
76
77	Server *etcdserver.EtcdServer
78
79	cfg   Config
80	stopc chan struct{}
81	errc  chan error
82
83	closeOnce sync.Once
84}
85
86type peerListener struct {
87	net.Listener
88	serve func() error
89	close func(context.Context) error
90}
91
92// StartEtcd launches the etcd server and HTTP handlers for client/server communication.
93// The returned Etcd.Server is not guaranteed to have joined the cluster. Wait
94// on the Etcd.Server.ReadyNotify() channel to know when it completes and is ready for use.
95func StartEtcd(inCfg *Config) (e *Etcd, err error) {
96	if err = inCfg.Validate(); err != nil {
97		return nil, err
98	}
99	serving := false
100	e = &Etcd{cfg: *inCfg, stopc: make(chan struct{})}
101	cfg := &e.cfg
102	defer func() {
103		if e == nil || err == nil {
104			return
105		}
106		if !serving {
107			// errored before starting gRPC server for serveCtx.serversC
108			for _, sctx := range e.sctxs {
109				close(sctx.serversC)
110			}
111		}
112		e.Close()
113		e = nil
114	}()
115
116	if e.cfg.logger != nil {
117		e.cfg.logger.Info(
118			"configuring peer listeners",
119			zap.Strings("listen-peer-urls", e.cfg.getLPURLs()),
120		)
121	}
122	if e.Peers, err = configurePeerListeners(cfg); err != nil {
123		return e, err
124	}
125
126	if e.cfg.logger != nil {
127		e.cfg.logger.Info(
128			"configuring client listeners",
129			zap.Strings("listen-client-urls", e.cfg.getLCURLs()),
130		)
131	}
132	if e.sctxs, err = configureClientListeners(cfg); err != nil {
133		return e, err
134	}
135
136	for _, sctx := range e.sctxs {
137		e.Clients = append(e.Clients, sctx.l)
138	}
139
140	var (
141		urlsmap types.URLsMap
142		token   string
143	)
144	memberInitialized := true
145	if !isMemberInitialized(cfg) {
146		memberInitialized = false
147		urlsmap, token, err = cfg.PeerURLsMapAndToken("etcd")
148		if err != nil {
149			return e, fmt.Errorf("error setting up initial cluster: %v", err)
150		}
151	}
152
153	// AutoCompactionRetention defaults to "0" if not set.
154	if len(cfg.AutoCompactionRetention) == 0 {
155		cfg.AutoCompactionRetention = "0"
156	}
157	autoCompactionRetention, err := parseCompactionRetention(cfg.AutoCompactionMode, cfg.AutoCompactionRetention)
158	if err != nil {
159		return e, err
160	}
161
162	backendFreelistType := parseBackendFreelistType(cfg.ExperimentalBackendFreelistType)
163
164	srvcfg := etcdserver.ServerConfig{
165		Name:                        cfg.Name,
166		ClientURLs:                  cfg.ACUrls,
167		PeerURLs:                    cfg.APUrls,
168		DataDir:                     cfg.Dir,
169		DedicatedWALDir:             cfg.WalDir,
170		SnapshotCount:               cfg.SnapshotCount,
171		SnapshotCatchUpEntries:      cfg.SnapshotCatchUpEntries,
172		MaxSnapFiles:                cfg.MaxSnapFiles,
173		MaxWALFiles:                 cfg.MaxWalFiles,
174		InitialPeerURLsMap:          urlsmap,
175		InitialClusterToken:         token,
176		DiscoveryURL:                cfg.Durl,
177		DiscoveryProxy:              cfg.Dproxy,
178		NewCluster:                  cfg.IsNewCluster(),
179		PeerTLSInfo:                 cfg.PeerTLSInfo,
180		TickMs:                      cfg.TickMs,
181		ElectionTicks:               cfg.ElectionTicks(),
182		InitialElectionTickAdvance:  cfg.InitialElectionTickAdvance,
183		AutoCompactionRetention:     autoCompactionRetention,
184		AutoCompactionMode:          cfg.AutoCompactionMode,
185		QuotaBackendBytes:           cfg.QuotaBackendBytes,
186		BackendBatchLimit:           cfg.BackendBatchLimit,
187		BackendFreelistType:         backendFreelistType,
188		BackendBatchInterval:        cfg.BackendBatchInterval,
189		MaxTxnOps:                   cfg.MaxTxnOps,
190		MaxRequestBytes:             cfg.MaxRequestBytes,
191		StrictReconfigCheck:         cfg.StrictReconfigCheck,
192		ClientCertAuthEnabled:       cfg.ClientTLSInfo.ClientCertAuth,
193		AuthToken:                   cfg.AuthToken,
194		BcryptCost:                  cfg.BcryptCost,
195		TokenTTL:                    cfg.AuthTokenTTL,
196		CORS:                        cfg.CORS,
197		HostWhitelist:               cfg.HostWhitelist,
198		InitialCorruptCheck:         cfg.ExperimentalInitialCorruptCheck,
199		CorruptCheckTime:            cfg.ExperimentalCorruptCheckTime,
200		PreVote:                     cfg.PreVote,
201		Logger:                      cfg.logger,
202		LoggerConfig:                cfg.loggerConfig,
203		LoggerCore:                  cfg.loggerCore,
204		LoggerWriteSyncer:           cfg.loggerWriteSyncer,
205		Debug:                       cfg.Debug,
206		ForceNewCluster:             cfg.ForceNewCluster,
207		EnableGRPCGateway:           cfg.EnableGRPCGateway,
208		UnsafeNoFsync:               cfg.UnsafeNoFsync,
209		EnableLeaseCheckpoint:       cfg.ExperimentalEnableLeaseCheckpoint,
210		CompactionBatchLimit:        cfg.ExperimentalCompactionBatchLimit,
211		WatchProgressNotifyInterval: cfg.ExperimentalWatchProgressNotifyInterval,
212		WarningApplyDuration:        cfg.ExperimentalWarningApplyDuration,
213	}
214	print(e.cfg.logger, *cfg, srvcfg, memberInitialized)
215	if e.Server, err = etcdserver.NewServer(srvcfg); err != nil {
216		return e, err
217	}
218
219	// buffer channel so goroutines on closed connections won't wait forever
220	e.errc = make(chan error, len(e.Peers)+len(e.Clients)+2*len(e.sctxs))
221
222	// newly started member ("memberInitialized==false")
223	// does not need corruption check
224	if memberInitialized {
225		if err = e.Server.CheckInitialHashKV(); err != nil {
226			// set "EtcdServer" to nil, so that it does not block on "EtcdServer.Close()"
227			// (nothing to close since rafthttp transports have not been started)
228			e.Server = nil
229			return e, err
230		}
231	}
232	e.Server.Start()
233
234	if err = e.servePeers(); err != nil {
235		return e, err
236	}
237	if err = e.serveClients(); err != nil {
238		return e, err
239	}
240	if err = e.serveMetrics(); err != nil {
241		return e, err
242	}
243
244	if e.cfg.logger != nil {
245		e.cfg.logger.Info(
246			"now serving peer/client/metrics",
247			zap.String("local-member-id", e.Server.ID().String()),
248			zap.Strings("initial-advertise-peer-urls", e.cfg.getAPURLs()),
249			zap.Strings("listen-peer-urls", e.cfg.getLPURLs()),
250			zap.Strings("advertise-client-urls", e.cfg.getACURLs()),
251			zap.Strings("listen-client-urls", e.cfg.getLCURLs()),
252			zap.Strings("listen-metrics-urls", e.cfg.getMetricsURLs()),
253		)
254	}
255	serving = true
256	return e, nil
257}
258
259func print(lg *zap.Logger, ec Config, sc etcdserver.ServerConfig, memberInitialized bool) {
260	// TODO: remove this after dropping "capnslog"
261	if lg == nil {
262		plog.Infof("name = %s", ec.Name)
263		if sc.ForceNewCluster {
264			plog.Infof("force new cluster")
265		}
266		plog.Infof("data dir = %s", sc.DataDir)
267		plog.Infof("member dir = %s", sc.MemberDir())
268		if sc.DedicatedWALDir != "" {
269			plog.Infof("dedicated WAL dir = %s", sc.DedicatedWALDir)
270		}
271		plog.Infof("heartbeat = %dms", sc.TickMs)
272		plog.Infof("election = %dms", sc.ElectionTicks*int(sc.TickMs))
273		plog.Infof("snapshot count = %d", sc.SnapshotCount)
274		if len(sc.DiscoveryURL) != 0 {
275			plog.Infof("discovery URL= %s", sc.DiscoveryURL)
276			if len(sc.DiscoveryProxy) != 0 {
277				plog.Infof("discovery proxy = %s", sc.DiscoveryProxy)
278			}
279		}
280		plog.Infof("advertise client URLs = %s", sc.ClientURLs)
281		if memberInitialized {
282			plog.Infof("initial advertise peer URLs = %s", sc.PeerURLs)
283			plog.Infof("initial cluster = %s", sc.InitialPeerURLsMap)
284		}
285	} else {
286		cors := make([]string, 0, len(ec.CORS))
287		for v := range ec.CORS {
288			cors = append(cors, v)
289		}
290		sort.Strings(cors)
291
292		hss := make([]string, 0, len(ec.HostWhitelist))
293		for v := range ec.HostWhitelist {
294			hss = append(hss, v)
295		}
296		sort.Strings(hss)
297
298		quota := ec.QuotaBackendBytes
299		if quota == 0 {
300			quota = etcdserver.DefaultQuotaBytes
301		}
302
303		lg.Info(
304			"starting an etcd server",
305			zap.String("etcd-version", version.Version),
306			zap.String("git-sha", version.GitSHA),
307			zap.String("go-version", runtime.Version()),
308			zap.String("go-os", runtime.GOOS),
309			zap.String("go-arch", runtime.GOARCH),
310			zap.Int("max-cpu-set", runtime.GOMAXPROCS(0)),
311			zap.Int("max-cpu-available", runtime.NumCPU()),
312			zap.Bool("member-initialized", memberInitialized),
313			zap.String("name", sc.Name),
314			zap.String("data-dir", sc.DataDir),
315			zap.String("wal-dir", ec.WalDir),
316			zap.String("wal-dir-dedicated", sc.DedicatedWALDir),
317			zap.String("member-dir", sc.MemberDir()),
318			zap.Bool("force-new-cluster", sc.ForceNewCluster),
319			zap.String("heartbeat-interval", fmt.Sprintf("%v", time.Duration(sc.TickMs)*time.Millisecond)),
320			zap.String("election-timeout", fmt.Sprintf("%v", time.Duration(sc.ElectionTicks*int(sc.TickMs))*time.Millisecond)),
321			zap.Bool("initial-election-tick-advance", sc.InitialElectionTickAdvance),
322			zap.Uint64("snapshot-count", sc.SnapshotCount),
323			zap.Uint64("snapshot-catchup-entries", sc.SnapshotCatchUpEntries),
324			zap.Strings("initial-advertise-peer-urls", ec.getAPURLs()),
325			zap.Strings("listen-peer-urls", ec.getLPURLs()),
326			zap.Strings("advertise-client-urls", ec.getACURLs()),
327			zap.Strings("listen-client-urls", ec.getLCURLs()),
328			zap.Strings("listen-metrics-urls", ec.getMetricsURLs()),
329			zap.Strings("cors", cors),
330			zap.Strings("host-whitelist", hss),
331			zap.String("initial-cluster", sc.InitialPeerURLsMap.String()),
332			zap.String("initial-cluster-state", ec.ClusterState),
333			zap.String("initial-cluster-token", sc.InitialClusterToken),
334			zap.Int64("quota-size-bytes", quota),
335			zap.Bool("pre-vote", sc.PreVote),
336			zap.Bool("initial-corrupt-check", sc.InitialCorruptCheck),
337			zap.String("corrupt-check-time-interval", sc.CorruptCheckTime.String()),
338			zap.String("auto-compaction-mode", sc.AutoCompactionMode),
339			zap.Duration("auto-compaction-retention", sc.AutoCompactionRetention),
340			zap.String("auto-compaction-interval", sc.AutoCompactionRetention.String()),
341			zap.String("discovery-url", sc.DiscoveryURL),
342			zap.String("discovery-proxy", sc.DiscoveryProxy),
343		)
344	}
345}
346
347// Config returns the current configuration.
348func (e *Etcd) Config() Config {
349	return e.cfg
350}
351
352// Close gracefully shuts down all servers/listeners.
353// Client requests will be terminated with request timeout.
354// After timeout, enforce remaning requests be closed immediately.
355func (e *Etcd) Close() {
356	fields := []zap.Field{
357		zap.String("name", e.cfg.Name),
358		zap.String("data-dir", e.cfg.Dir),
359		zap.Strings("advertise-peer-urls", e.cfg.getAPURLs()),
360		zap.Strings("advertise-client-urls", e.cfg.getACURLs()),
361	}
362	lg := e.GetLogger()
363	if lg != nil {
364		lg.Info("closing etcd server", fields...)
365	}
366	defer func() {
367		if lg != nil {
368			lg.Info("closed etcd server", fields...)
369			lg.Sync()
370		}
371	}()
372
373	e.closeOnce.Do(func() { close(e.stopc) })
374
375	// close client requests with request timeout
376	timeout := 2 * time.Second
377	if e.Server != nil {
378		timeout = e.Server.Cfg.ReqTimeout()
379	}
380	for _, sctx := range e.sctxs {
381		for ss := range sctx.serversC {
382			ctx, cancel := context.WithTimeout(context.Background(), timeout)
383			stopServers(ctx, ss)
384			cancel()
385		}
386	}
387
388	for _, sctx := range e.sctxs {
389		sctx.cancel()
390	}
391
392	for i := range e.Clients {
393		if e.Clients[i] != nil {
394			e.Clients[i].Close()
395		}
396	}
397
398	for i := range e.metricsListeners {
399		e.metricsListeners[i].Close()
400	}
401
402	// close rafthttp transports
403	if e.Server != nil {
404		e.Server.Stop()
405	}
406
407	// close all idle connections in peer handler (wait up to 1-second)
408	for i := range e.Peers {
409		if e.Peers[i] != nil && e.Peers[i].close != nil {
410			ctx, cancel := context.WithTimeout(context.Background(), time.Second)
411			e.Peers[i].close(ctx)
412			cancel()
413		}
414	}
415}
416
417func stopServers(ctx context.Context, ss *servers) {
418	shutdownNow := func() {
419		// first, close the http.Server
420		ss.http.Shutdown(ctx)
421		// then close grpc.Server; cancels all active RPCs
422		ss.grpc.Stop()
423	}
424
425	// do not grpc.Server.GracefulStop with TLS enabled etcd server
426	// See https://github.com/grpc/grpc-go/issues/1384#issuecomment-317124531
427	// and https://github.com/etcd-io/etcd/issues/8916
428	if ss.secure {
429		shutdownNow()
430		return
431	}
432
433	ch := make(chan struct{})
434	go func() {
435		defer close(ch)
436		// close listeners to stop accepting new connections,
437		// will block on any existing transports
438		ss.grpc.GracefulStop()
439	}()
440
441	// wait until all pending RPCs are finished
442	select {
443	case <-ch:
444	case <-ctx.Done():
445		// took too long, manually close open transports
446		// e.g. watch streams
447		shutdownNow()
448
449		// concurrent GracefulStop should be interrupted
450		<-ch
451	}
452}
453
454func (e *Etcd) Err() <-chan error { return e.errc }
455
456func configurePeerListeners(cfg *Config) (peers []*peerListener, err error) {
457	if err = updateCipherSuites(&cfg.PeerTLSInfo, cfg.CipherSuites); err != nil {
458		return nil, err
459	}
460	if err = cfg.PeerSelfCert(); err != nil {
461		if cfg.logger != nil {
462			cfg.logger.Fatal("failed to get peer self-signed certs", zap.Error(err))
463		} else {
464			plog.Fatalf("could not get certs (%v)", err)
465		}
466	}
467	if !cfg.PeerTLSInfo.Empty() {
468		if cfg.logger != nil {
469			cfg.logger.Info(
470				"starting with peer TLS",
471				zap.String("tls-info", fmt.Sprintf("%+v", cfg.PeerTLSInfo)),
472				zap.Strings("cipher-suites", cfg.CipherSuites),
473			)
474		} else {
475			plog.Infof("peerTLS: %s", cfg.PeerTLSInfo)
476		}
477	}
478
479	peers = make([]*peerListener, len(cfg.LPUrls))
480	defer func() {
481		if err == nil {
482			return
483		}
484		for i := range peers {
485			if peers[i] != nil && peers[i].close != nil {
486				if cfg.logger != nil {
487					cfg.logger.Warn(
488						"closing peer listener",
489						zap.String("address", cfg.LPUrls[i].String()),
490						zap.Error(err),
491					)
492				} else {
493					plog.Info("stopping listening for peers on ", cfg.LPUrls[i].String())
494				}
495				ctx, cancel := context.WithTimeout(context.Background(), time.Second)
496				peers[i].close(ctx)
497				cancel()
498			}
499		}
500	}()
501
502	for i, u := range cfg.LPUrls {
503		if u.Scheme == "http" {
504			if !cfg.PeerTLSInfo.Empty() {
505				if cfg.logger != nil {
506					cfg.logger.Warn("scheme is HTTP while key and cert files are present; ignoring key and cert files", zap.String("peer-url", u.String()))
507				} else {
508					plog.Warningf("The scheme of peer url %s is HTTP while peer key/cert files are presented. Ignored peer key/cert files.", u.String())
509				}
510			}
511			if cfg.PeerTLSInfo.ClientCertAuth {
512				if cfg.logger != nil {
513					cfg.logger.Warn("scheme is HTTP while --peer-client-cert-auth is enabled; ignoring client cert auth for this URL", zap.String("peer-url", u.String()))
514				} else {
515					plog.Warningf("The scheme of peer url %s is HTTP while client cert auth (--peer-client-cert-auth) is enabled. Ignored client cert auth for this url.", u.String())
516				}
517			}
518		}
519		peers[i] = &peerListener{close: func(context.Context) error { return nil }}
520		peers[i].Listener, err = rafthttp.NewListener(u, &cfg.PeerTLSInfo)
521		if err != nil {
522			return nil, err
523		}
524		// once serve, overwrite with 'http.Server.Shutdown'
525		peers[i].close = func(context.Context) error {
526			return peers[i].Listener.Close()
527		}
528	}
529	return peers, nil
530}
531
532// configure peer handlers after rafthttp.Transport started
533func (e *Etcd) servePeers() (err error) {
534	ph := etcdhttp.NewPeerHandler(e.GetLogger(), e.Server)
535	var peerTLScfg *tls.Config
536	if !e.cfg.PeerTLSInfo.Empty() {
537		if peerTLScfg, err = e.cfg.PeerTLSInfo.ServerConfig(); err != nil {
538			return err
539		}
540	}
541
542	for _, p := range e.Peers {
543		u := p.Listener.Addr().String()
544		gs := v3rpc.Server(e.Server, peerTLScfg)
545		m := cmux.New(p.Listener)
546		go gs.Serve(m.Match(cmux.HTTP2()))
547		srv := &http.Server{
548			Handler:     grpcHandlerFunc(gs, ph),
549			ReadTimeout: 5 * time.Minute,
550			ErrorLog:    defaultLog.New(ioutil.Discard, "", 0), // do not log user error
551		}
552		go srv.Serve(m.Match(cmux.Any()))
553		p.serve = func() error { return m.Serve() }
554		p.close = func(ctx context.Context) error {
555			// gracefully shutdown http.Server
556			// close open listeners, idle connections
557			// until context cancel or time-out
558			if e.cfg.logger != nil {
559				e.cfg.logger.Info(
560					"stopping serving peer traffic",
561					zap.String("address", u),
562				)
563			}
564			stopServers(ctx, &servers{secure: peerTLScfg != nil, grpc: gs, http: srv})
565			if e.cfg.logger != nil {
566				e.cfg.logger.Info(
567					"stopped serving peer traffic",
568					zap.String("address", u),
569				)
570			}
571			return nil
572		}
573	}
574
575	// start peer servers in a goroutine
576	for _, pl := range e.Peers {
577		go func(l *peerListener) {
578			u := l.Addr().String()
579			if e.cfg.logger != nil {
580				e.cfg.logger.Info(
581					"serving peer traffic",
582					zap.String("address", u),
583				)
584			} else {
585				plog.Info("listening for peers on ", u)
586			}
587			e.errHandler(l.serve())
588		}(pl)
589	}
590	return nil
591}
592
593func configureClientListeners(cfg *Config) (sctxs map[string]*serveCtx, err error) {
594	if err = updateCipherSuites(&cfg.ClientTLSInfo, cfg.CipherSuites); err != nil {
595		return nil, err
596	}
597	if err = cfg.ClientSelfCert(); err != nil {
598		if cfg.logger != nil {
599			cfg.logger.Fatal("failed to get client self-signed certs", zap.Error(err))
600		} else {
601			plog.Fatalf("could not get certs (%v)", err)
602		}
603	}
604	if cfg.EnablePprof {
605		if cfg.logger != nil {
606			cfg.logger.Info("pprof is enabled", zap.String("path", debugutil.HTTPPrefixPProf))
607		} else {
608			plog.Infof("pprof is enabled under %s", debugutil.HTTPPrefixPProf)
609		}
610	}
611
612	sctxs = make(map[string]*serveCtx)
613	for _, u := range cfg.LCUrls {
614		sctx := newServeCtx(cfg.logger)
615		if u.Scheme == "http" || u.Scheme == "unix" {
616			if !cfg.ClientTLSInfo.Empty() {
617				if cfg.logger != nil {
618					cfg.logger.Warn("scheme is HTTP while key and cert files are present; ignoring key and cert files", zap.String("client-url", u.String()))
619				} else {
620					plog.Warningf("The scheme of client url %s is HTTP while peer key/cert files are presented. Ignored key/cert files.", u.String())
621				}
622			}
623			if cfg.ClientTLSInfo.ClientCertAuth {
624				if cfg.logger != nil {
625					cfg.logger.Warn("scheme is HTTP while --client-cert-auth is enabled; ignoring client cert auth for this URL", zap.String("client-url", u.String()))
626				} else {
627					plog.Warningf("The scheme of client url %s is HTTP while client cert auth (--client-cert-auth) is enabled. Ignored client cert auth for this url.", u.String())
628				}
629			}
630		}
631		if (u.Scheme == "https" || u.Scheme == "unixs") && cfg.ClientTLSInfo.Empty() {
632			return nil, fmt.Errorf("TLS key/cert (--cert-file, --key-file) must be provided for client url %s with HTTPS scheme", u.String())
633		}
634
635		network := "tcp"
636		addr := u.Host
637		if u.Scheme == "unix" || u.Scheme == "unixs" {
638			network = "unix"
639			addr = u.Host + u.Path
640		}
641		sctx.network = network
642
643		sctx.secure = u.Scheme == "https" || u.Scheme == "unixs"
644		sctx.insecure = !sctx.secure
645		if oldctx := sctxs[addr]; oldctx != nil {
646			oldctx.secure = oldctx.secure || sctx.secure
647			oldctx.insecure = oldctx.insecure || sctx.insecure
648			continue
649		}
650
651		if sctx.l, err = net.Listen(network, addr); err != nil {
652			return nil, err
653		}
654		// net.Listener will rewrite ipv4 0.0.0.0 to ipv6 [::], breaking
655		// hosts that disable ipv6. So, use the address given by the user.
656		sctx.addr = addr
657
658		if fdLimit, fderr := runtimeutil.FDLimit(); fderr == nil {
659			if fdLimit <= reservedInternalFDNum {
660				if cfg.logger != nil {
661					cfg.logger.Fatal(
662						"file descriptor limit of etcd process is too low; please set higher",
663						zap.Uint64("limit", fdLimit),
664						zap.Int("recommended-limit", reservedInternalFDNum),
665					)
666				} else {
667					plog.Fatalf("file descriptor limit[%d] of etcd process is too low, and should be set higher than %d to ensure internal usage", fdLimit, reservedInternalFDNum)
668				}
669			}
670			sctx.l = transport.LimitListener(sctx.l, int(fdLimit-reservedInternalFDNum))
671		}
672
673		if network == "tcp" {
674			if sctx.l, err = transport.NewKeepAliveListener(sctx.l, network, nil); err != nil {
675				return nil, err
676			}
677		}
678
679		defer func() {
680			if err == nil {
681				return
682			}
683			sctx.l.Close()
684			if cfg.logger != nil {
685				cfg.logger.Warn(
686					"closing peer listener",
687					zap.String("address", u.Host),
688					zap.Error(err),
689				)
690			} else {
691				plog.Info("stopping listening for client requests on ", u.Host)
692			}
693		}()
694		for k := range cfg.UserHandlers {
695			sctx.userHandlers[k] = cfg.UserHandlers[k]
696		}
697		sctx.serviceRegister = cfg.ServiceRegister
698		if cfg.EnablePprof || cfg.Debug {
699			sctx.registerPprof()
700		}
701		if cfg.Debug {
702			sctx.registerTrace()
703		}
704		sctxs[addr] = sctx
705	}
706	return sctxs, nil
707}
708
709func (e *Etcd) serveClients() (err error) {
710	if !e.cfg.ClientTLSInfo.Empty() {
711		if e.cfg.logger != nil {
712			e.cfg.logger.Info(
713				"starting with client TLS",
714				zap.String("tls-info", fmt.Sprintf("%+v", e.cfg.ClientTLSInfo)),
715				zap.Strings("cipher-suites", e.cfg.CipherSuites),
716			)
717		} else {
718			plog.Infof("ClientTLS: %s", e.cfg.ClientTLSInfo)
719		}
720	}
721
722	// Start a client server goroutine for each listen address
723	var h http.Handler
724	if e.Config().EnableV2 {
725		if len(e.Config().ExperimentalEnableV2V3) > 0 {
726			srv := v2v3.NewServer(e.cfg.logger, v3client.New(e.Server), e.cfg.ExperimentalEnableV2V3)
727			h = v2http.NewClientHandler(e.GetLogger(), srv, e.Server.Cfg.ReqTimeout())
728		} else {
729			h = v2http.NewClientHandler(e.GetLogger(), e.Server, e.Server.Cfg.ReqTimeout())
730		}
731	} else {
732		mux := http.NewServeMux()
733		etcdhttp.HandleBasic(mux, e.Server)
734		h = mux
735	}
736
737	gopts := []grpc.ServerOption{}
738	if e.cfg.GRPCKeepAliveMinTime > time.Duration(0) {
739		gopts = append(gopts, grpc.KeepaliveEnforcementPolicy(keepalive.EnforcementPolicy{
740			MinTime:             e.cfg.GRPCKeepAliveMinTime,
741			PermitWithoutStream: false,
742		}))
743	}
744	if e.cfg.GRPCKeepAliveInterval > time.Duration(0) &&
745		e.cfg.GRPCKeepAliveTimeout > time.Duration(0) {
746		gopts = append(gopts, grpc.KeepaliveParams(keepalive.ServerParameters{
747			Time:    e.cfg.GRPCKeepAliveInterval,
748			Timeout: e.cfg.GRPCKeepAliveTimeout,
749		}))
750	}
751
752	// start client servers in each goroutine
753	for _, sctx := range e.sctxs {
754		go func(s *serveCtx) {
755			e.errHandler(s.serve(e.Server, &e.cfg.ClientTLSInfo, h, e.errHandler, gopts...))
756		}(sctx)
757	}
758	return nil
759}
760
761func (e *Etcd) serveMetrics() (err error) {
762	if e.cfg.Metrics == "extensive" {
763		grpc_prometheus.EnableHandlingTimeHistogram()
764	}
765
766	if len(e.cfg.ListenMetricsUrls) > 0 {
767		metricsMux := http.NewServeMux()
768		etcdhttp.HandleMetricsHealth(metricsMux, e.Server)
769
770		for _, murl := range e.cfg.ListenMetricsUrls {
771			tlsInfo := &e.cfg.ClientTLSInfo
772			if murl.Scheme == "http" {
773				tlsInfo = nil
774			}
775			ml, err := transport.NewListener(murl.Host, murl.Scheme, tlsInfo)
776			if err != nil {
777				return err
778			}
779			e.metricsListeners = append(e.metricsListeners, ml)
780			go func(u url.URL, ln net.Listener) {
781				if e.cfg.logger != nil {
782					e.cfg.logger.Info(
783						"serving metrics",
784						zap.String("address", u.String()),
785					)
786				} else {
787					plog.Info("listening for metrics on ", u.String())
788				}
789				e.errHandler(http.Serve(ln, metricsMux))
790			}(murl, ml)
791		}
792	}
793	return nil
794}
795
796func (e *Etcd) errHandler(err error) {
797	select {
798	case <-e.stopc:
799		return
800	default:
801	}
802	select {
803	case <-e.stopc:
804	case e.errc <- err:
805	}
806}
807
808// GetLogger returns the logger.
809func (e *Etcd) GetLogger() *zap.Logger {
810	e.cfg.loggerMu.RLock()
811	l := e.cfg.logger
812	e.cfg.loggerMu.RUnlock()
813	return l
814}
815
816func parseCompactionRetention(mode, retention string) (ret time.Duration, err error) {
817	h, err := strconv.Atoi(retention)
818	if err == nil && h >= 0 {
819		switch mode {
820		case CompactorModeRevision:
821			ret = time.Duration(int64(h))
822		case CompactorModePeriodic:
823			ret = time.Duration(int64(h)) * time.Hour
824		}
825	} else {
826		// periodic compaction
827		ret, err = time.ParseDuration(retention)
828		if err != nil {
829			return 0, fmt.Errorf("error parsing CompactionRetention: %v", err)
830		}
831	}
832	return ret, nil
833}
834