1package connect
2
3import (
4	"context"
5	"crypto/tls"
6	"crypto/x509"
7	"errors"
8	"net"
9	"net/http"
10	"time"
11
12	"github.com/hashicorp/consul/api"
13	"github.com/hashicorp/consul/api/watch"
14	"github.com/hashicorp/consul/logging"
15	"github.com/hashicorp/go-hclog"
16	"golang.org/x/net/http2"
17)
18
19// Service represents a Consul service that accepts and/or connects via Connect.
20// This can represent a service that only is a server, only is a client, or
21// both.
22//
23// TODO(banks): Agent implicit health checks based on knowing which certs are
24// available should prevent clients being routed until the agent knows the
25// service has been delivered valid certificates. Once built, document that here
26// too.
27type Service struct {
28	// service is the name (not ID) for the Consul service. This is used to request
29	// Connect metadata.
30	service string
31
32	// client is the Consul API client. It must be configured with an appropriate
33	// Token that has `service:write` policy on the provided service. If an
34	// insufficient token is provided, the Service will abort further attempts to
35	// fetch certificates and print a loud error message. It will not Close() or
36	// kill the process since that could lead to a crash loop in every service if
37	// ACL token was revoked. All attempts to dial will error and any incoming
38	// connections will fail to verify. It may be nil if the Service is being
39	// configured from local files for development or testing.
40	client *api.Client
41
42	// tlsCfg is the dynamic TLS config
43	tlsCfg *dynamicTLSConfig
44
45	// httpResolverFromAddr is a function that returns a Resolver from a string
46	// address for HTTP clients. It's privately pluggable to make testing easier
47	// but will default to a simple method to parse the host as a Consul DNS host.
48	httpResolverFromAddr func(addr string) (Resolver, error)
49
50	rootsWatch *watch.Plan
51	leafWatch  *watch.Plan
52
53	logger hclog.Logger
54}
55
56// Config represents the configuration options for a service.
57type Config struct {
58	// client is the mandatory Consul API client. Will panic if not set.
59	Client *api.Client
60	// Logger is the logger to use. If nil a default logger will be used.
61	Logger hclog.Logger
62	// ServerNextProtos are the protocols advertised via ALPN. If nil, defaults to
63	// ["h2"] for backwards compatibility. Usually there is no need to change this,
64	// see https://github.com/hashicorp/consul/issues/4466 for some discussion on why
65	// this can be useful.
66	ServerNextProtos []string
67}
68
69// NewServiceWithConfig starts a service with the specified Config.
70func NewServiceWithConfig(serviceName string, config Config) (*Service, error) {
71	if config.Logger == nil {
72		config.Logger = hclog.New(&hclog.LoggerOptions{})
73	}
74	tlsCfg := defaultTLSConfig()
75	if config.ServerNextProtos != nil {
76		tlsCfg.NextProtos = config.ServerNextProtos
77	}
78	s := &Service{
79		service:              serviceName,
80		client:               config.Client,
81		logger:               config.Logger.Named(logging.Connect).With("service", serviceName),
82		tlsCfg:               newDynamicTLSConfig(tlsCfg, config.Logger),
83		httpResolverFromAddr: ConsulResolverFromAddrFunc(config.Client),
84	}
85
86	// Set up root and leaf watches
87	p, err := watch.Parse(map[string]interface{}{
88		"type": "connect_roots",
89	})
90	if err != nil {
91		return nil, err
92	}
93	s.rootsWatch = p
94	s.rootsWatch.HybridHandler = s.rootsWatchHandler
95
96	p, err = watch.Parse(map[string]interface{}{
97		"type":    "connect_leaf",
98		"service": s.service,
99	})
100	if err != nil {
101		return nil, err
102	}
103	s.leafWatch = p
104	s.leafWatch.HybridHandler = s.leafWatchHandler
105
106	go s.rootsWatch.RunWithClientAndHclog(config.Client, s.logger)
107	go s.leafWatch.RunWithClientAndHclog(config.Client, s.logger)
108
109	return s, nil
110}
111
112// NewService creates and starts a Service. The caller must close the returned
113// service to free resources and allow the program to exit normally. This is
114// typically called in a signal handler.
115//
116// Caller must provide client which is already configured to speak to the local
117// Consul agent, and with an ACL token that has `service:write` privileges for
118// the service specified.
119func NewService(serviceName string, client *api.Client) (*Service, error) {
120	return NewServiceWithConfig(serviceName, Config{Client: client})
121}
122
123// NewServiceWithLogger starts the service with a specified log.Logger.
124func NewServiceWithLogger(serviceName string, client *api.Client,
125	logger hclog.Logger) (*Service, error) {
126	return NewServiceWithConfig(serviceName, Config{Client: client, Logger: logger})
127}
128
129// NewDevServiceFromCertFiles creates a Service using certificate and key files
130// passed instead of fetching them from the client.
131func NewDevServiceFromCertFiles(serviceID string, logger hclog.Logger,
132	caFile, certFile, keyFile string) (*Service, error) {
133
134	tlsCfg, err := devTLSConfigFromFiles(caFile, certFile, keyFile)
135	if err != nil {
136		return nil, err
137	}
138	return NewDevServiceWithTLSConfig(serviceID, logger, tlsCfg)
139}
140
141// NewDevServiceWithTLSConfig creates a Service using static TLS config passed.
142// It's mostly useful for testing.
143func NewDevServiceWithTLSConfig(serviceName string, logger hclog.Logger,
144	tlsCfg *tls.Config) (*Service, error) {
145	s := &Service{
146		service: serviceName,
147		logger:  logger,
148		tlsCfg:  newDynamicTLSConfig(tlsCfg, logger),
149	}
150	return s, nil
151}
152
153// Name returns the name of the service this object represents. Note it is the
154// service _name_ as used during discovery, not the ID used to uniquely identify
155// an instance of the service with an agent.
156func (s *Service) Name() string {
157	return s.service
158}
159
160// ServerTLSConfig returns a *tls.Config that allows any TCP listener to accept
161// and authorize incoming Connect clients. It will return a single static config
162// with hooks to dynamically load certificates, and perform Connect
163// authorization during verification. Service implementations do not need to
164// reload this to get new certificates.
165//
166// At any time it may be possible that the Service instance does not have access
167// to usable certificates due to not being initially setup yet or a prolonged
168// error during renewal. The listener will be able to accept connections again
169// once connectivity is restored provided the client's Token is valid.
170//
171// To prevent routing traffic to the app instance while it's certificates are
172// invalid or not populated yet you may use Ready in a health check endpoint
173// and/or ReadyWait during startup before starting the TLS listener. The latter
174// only prevents connections during initial bootstrap (including permission
175// issues where certs can never be issued due to bad credentials) but won't
176// handle the case that certificates expire and an error prevents timely
177// renewal.
178func (s *Service) ServerTLSConfig() *tls.Config {
179	return s.tlsCfg.Get(newServerSideVerifier(s.logger, s.client, s.service))
180}
181
182// Dial connects to a remote Connect-enabled server. The passed Resolver is used
183// to discover a single candidate instance which will be dialed and have it's
184// TLS certificate verified against the expected identity. Failures are returned
185// directly with no retries. Repeated dials may use different instances
186// depending on the Resolver implementation.
187//
188// Timeout can be managed via the Context.
189//
190// Calls to Dial made before the Service has loaded certificates from the agent
191// will fail. You can prevent this by using Ready or ReadyWait in app during
192// startup.
193func (s *Service) Dial(ctx context.Context, resolver Resolver) (net.Conn, error) {
194	addr, certURI, err := resolver.Resolve(ctx)
195	if err != nil {
196		return nil, err
197	}
198	s.logger.Debug("resolved service instance",
199		"address", addr,
200		"identity", certURI.URI(),
201	)
202	var dialer net.Dialer
203	tcpConn, err := dialer.DialContext(ctx, "tcp", addr)
204	if err != nil {
205		return nil, err
206	}
207
208	tlsConn := tls.Client(tcpConn, s.tlsCfg.Get(clientSideVerifier))
209	// Set deadline for Handshake to complete.
210	deadline, ok := ctx.Deadline()
211	if ok {
212		tlsConn.SetDeadline(deadline)
213	}
214	// Perform handshake
215	if err = tlsConn.Handshake(); err != nil {
216		tlsConn.Close()
217		return nil, err
218	}
219	// Clear deadline since that was only for connection. Caller can set their own
220	// deadline later as necessary.
221	tlsConn.SetDeadline(time.Time{})
222
223	// Verify that the connect server's URI matches certURI
224	err = verifyServerCertMatchesURI(tlsConn.ConnectionState().PeerCertificates,
225		certURI)
226	if err != nil {
227		tlsConn.Close()
228		return nil, err
229	}
230	s.logger.Debug("successfully connected to service instance",
231		"address", addr,
232		"identity", certURI.URI(),
233	)
234	return tlsConn, nil
235}
236
237// HTTPDialTLS is compatible with http.Transport.DialTLS. It expects the addr
238// hostname to be specified using Consul DNS query syntax, e.g.
239// "web.service.consul". It converts that into the equivalent ConsulResolver and
240// then call s.Dial with the resolver. This is low level, clients should
241// typically use HTTPClient directly.
242func (s *Service) HTTPDialTLS(network,
243	addr string) (net.Conn, error) {
244	if s.httpResolverFromAddr == nil {
245		return nil, errors.New("no http resolver configured")
246	}
247	r, err := s.httpResolverFromAddr(addr)
248	if err != nil {
249		return nil, err
250	}
251	// TODO(banks): figure out how to do timeouts better.
252	return s.Dial(context.Background(), r)
253}
254
255// HTTPClient returns an *http.Client configured to dial remote Consul Connect
256// HTTP services. The client will return an error if attempting to make requests
257// to a non HTTPS hostname. It resolves the domain of the request with the same
258// syntax as Consul DNS queries although it performs discovery directly via the
259// API rather than just relying on Consul DNS. Hostnames that are not valid
260// Consul DNS queries will fail.
261func (s *Service) HTTPClient() *http.Client {
262	t := &http.Transport{
263		// Sadly we can't use DialContext hook since that is expected to return a
264		// plain TCP connection and http.Client tries to start a TLS handshake over
265		// it. We need to control the handshake to be able to do our validation.
266		// So we have to use the older DialTLS which means no context/timeout
267		// support.
268		//
269		// TODO(banks): figure out how users can configure a timeout when using
270		// this and/or compatibility with http.Request.WithContext.
271		DialTLS: s.HTTPDialTLS,
272	}
273	// Need to manually re-enable http2 support since we set custom DialTLS.
274	// See https://golang.org/src/net/http/transport.go?s=8692:9036#L228
275	http2.ConfigureTransport(t)
276	return &http.Client{
277		Transport: t,
278	}
279}
280
281// Close stops the service and frees resources.
282func (s *Service) Close() error {
283	if s.rootsWatch != nil {
284		s.rootsWatch.Stop()
285	}
286	if s.leafWatch != nil {
287		s.leafWatch.Stop()
288	}
289	return nil
290}
291
292func (s *Service) rootsWatchHandler(blockParam watch.BlockingParamVal, raw interface{}) {
293	if raw == nil {
294		return
295	}
296	v, ok := raw.(*api.CARootList)
297	if !ok || v == nil {
298		s.logger.Error("got invalid response from root watch")
299		return
300	}
301
302	// Got new root certificates, update the tls.Configs.
303	roots := x509.NewCertPool()
304	for _, root := range v.Roots {
305		roots.AppendCertsFromPEM([]byte(root.RootCertPEM))
306	}
307
308	s.tlsCfg.SetRoots(roots)
309}
310
311func (s *Service) leafWatchHandler(blockParam watch.BlockingParamVal, raw interface{}) {
312	if raw == nil {
313		return // ignore
314	}
315	v, ok := raw.(*api.LeafCert)
316	if !ok || v == nil {
317		s.logger.Error("got invalid response from leaf watch")
318		return
319	}
320
321	// Got new leaf, update the tls.Configs
322	cert, err := tls.X509KeyPair([]byte(v.CertPEM), []byte(v.PrivateKeyPEM))
323	if err != nil {
324		s.logger.Error("failed to parse new leaf cert", "error", err)
325		return
326	}
327
328	s.tlsCfg.SetLeaf(&cert)
329}
330
331// Ready returns whether or not both roots and a leaf certificate are
332// configured. If both are non-nil, they are assumed to be valid and usable.
333func (s *Service) Ready() bool {
334	return s.tlsCfg.Ready()
335}
336
337// ReadyWait returns a chan that is closed when the Service becomes ready
338// for use for the first time. Note that if the Service is ready when it is
339// called it returns a nil chan. Ready means that it has root and leaf
340// certificates configured which we assume are valid. The service may
341// subsequently stop being "ready" if it's certificates expire or are revoked
342// and an error prevents new ones being loaded but this method will not stop
343// returning a nil chan in that case. It is only useful for initial startup. For
344// ongoing health Ready() should be used.
345func (s *Service) ReadyWait() <-chan struct{} {
346	return s.tlsCfg.ReadyWait()
347}
348