1package connect 2 3import ( 4 "context" 5 "crypto/tls" 6 "crypto/x509" 7 "errors" 8 "net" 9 "net/http" 10 "time" 11 12 "github.com/hashicorp/consul/api" 13 "github.com/hashicorp/consul/api/watch" 14 "github.com/hashicorp/consul/logging" 15 "github.com/hashicorp/go-hclog" 16 "golang.org/x/net/http2" 17) 18 19// Service represents a Consul service that accepts and/or connects via Connect. 20// This can represent a service that only is a server, only is a client, or 21// both. 22// 23// TODO(banks): Agent implicit health checks based on knowing which certs are 24// available should prevent clients being routed until the agent knows the 25// service has been delivered valid certificates. Once built, document that here 26// too. 27type Service struct { 28 // service is the name (not ID) for the Consul service. This is used to request 29 // Connect metadata. 30 service string 31 32 // client is the Consul API client. It must be configured with an appropriate 33 // Token that has `service:write` policy on the provided service. If an 34 // insufficient token is provided, the Service will abort further attempts to 35 // fetch certificates and print a loud error message. It will not Close() or 36 // kill the process since that could lead to a crash loop in every service if 37 // ACL token was revoked. All attempts to dial will error and any incoming 38 // connections will fail to verify. It may be nil if the Service is being 39 // configured from local files for development or testing. 40 client *api.Client 41 42 // tlsCfg is the dynamic TLS config 43 tlsCfg *dynamicTLSConfig 44 45 // httpResolverFromAddr is a function that returns a Resolver from a string 46 // address for HTTP clients. It's privately pluggable to make testing easier 47 // but will default to a simple method to parse the host as a Consul DNS host. 48 httpResolverFromAddr func(addr string) (Resolver, error) 49 50 rootsWatch *watch.Plan 51 leafWatch *watch.Plan 52 53 logger hclog.Logger 54} 55 56// Config represents the configuration options for a service. 57type Config struct { 58 // client is the mandatory Consul API client. Will panic if not set. 59 Client *api.Client 60 // Logger is the logger to use. If nil a default logger will be used. 61 Logger hclog.Logger 62 // ServerNextProtos are the protocols advertised via ALPN. If nil, defaults to 63 // ["h2"] for backwards compatibility. Usually there is no need to change this, 64 // see https://github.com/hashicorp/consul/issues/4466 for some discussion on why 65 // this can be useful. 66 ServerNextProtos []string 67} 68 69// NewServiceWithConfig starts a service with the specified Config. 70func NewServiceWithConfig(serviceName string, config Config) (*Service, error) { 71 if config.Logger == nil { 72 config.Logger = hclog.New(&hclog.LoggerOptions{}) 73 } 74 tlsCfg := defaultTLSConfig() 75 if config.ServerNextProtos != nil { 76 tlsCfg.NextProtos = config.ServerNextProtos 77 } 78 s := &Service{ 79 service: serviceName, 80 client: config.Client, 81 logger: config.Logger.Named(logging.Connect).With("service", serviceName), 82 tlsCfg: newDynamicTLSConfig(tlsCfg, config.Logger), 83 httpResolverFromAddr: ConsulResolverFromAddrFunc(config.Client), 84 } 85 86 // Set up root and leaf watches 87 p, err := watch.Parse(map[string]interface{}{ 88 "type": "connect_roots", 89 }) 90 if err != nil { 91 return nil, err 92 } 93 s.rootsWatch = p 94 s.rootsWatch.HybridHandler = s.rootsWatchHandler 95 96 p, err = watch.Parse(map[string]interface{}{ 97 "type": "connect_leaf", 98 "service": s.service, 99 }) 100 if err != nil { 101 return nil, err 102 } 103 s.leafWatch = p 104 s.leafWatch.HybridHandler = s.leafWatchHandler 105 106 go s.rootsWatch.RunWithClientAndHclog(config.Client, s.logger) 107 go s.leafWatch.RunWithClientAndHclog(config.Client, s.logger) 108 109 return s, nil 110} 111 112// NewService creates and starts a Service. The caller must close the returned 113// service to free resources and allow the program to exit normally. This is 114// typically called in a signal handler. 115// 116// Caller must provide client which is already configured to speak to the local 117// Consul agent, and with an ACL token that has `service:write` privileges for 118// the service specified. 119func NewService(serviceName string, client *api.Client) (*Service, error) { 120 return NewServiceWithConfig(serviceName, Config{Client: client}) 121} 122 123// NewServiceWithLogger starts the service with a specified log.Logger. 124func NewServiceWithLogger(serviceName string, client *api.Client, 125 logger hclog.Logger) (*Service, error) { 126 return NewServiceWithConfig(serviceName, Config{Client: client, Logger: logger}) 127} 128 129// NewDevServiceFromCertFiles creates a Service using certificate and key files 130// passed instead of fetching them from the client. 131func NewDevServiceFromCertFiles(serviceID string, logger hclog.Logger, 132 caFile, certFile, keyFile string) (*Service, error) { 133 134 tlsCfg, err := devTLSConfigFromFiles(caFile, certFile, keyFile) 135 if err != nil { 136 return nil, err 137 } 138 return NewDevServiceWithTLSConfig(serviceID, logger, tlsCfg) 139} 140 141// NewDevServiceWithTLSConfig creates a Service using static TLS config passed. 142// It's mostly useful for testing. 143func NewDevServiceWithTLSConfig(serviceName string, logger hclog.Logger, 144 tlsCfg *tls.Config) (*Service, error) { 145 s := &Service{ 146 service: serviceName, 147 logger: logger, 148 tlsCfg: newDynamicTLSConfig(tlsCfg, logger), 149 } 150 return s, nil 151} 152 153// Name returns the name of the service this object represents. Note it is the 154// service _name_ as used during discovery, not the ID used to uniquely identify 155// an instance of the service with an agent. 156func (s *Service) Name() string { 157 return s.service 158} 159 160// ServerTLSConfig returns a *tls.Config that allows any TCP listener to accept 161// and authorize incoming Connect clients. It will return a single static config 162// with hooks to dynamically load certificates, and perform Connect 163// authorization during verification. Service implementations do not need to 164// reload this to get new certificates. 165// 166// At any time it may be possible that the Service instance does not have access 167// to usable certificates due to not being initially setup yet or a prolonged 168// error during renewal. The listener will be able to accept connections again 169// once connectivity is restored provided the client's Token is valid. 170// 171// To prevent routing traffic to the app instance while it's certificates are 172// invalid or not populated yet you may use Ready in a health check endpoint 173// and/or ReadyWait during startup before starting the TLS listener. The latter 174// only prevents connections during initial bootstrap (including permission 175// issues where certs can never be issued due to bad credentials) but won't 176// handle the case that certificates expire and an error prevents timely 177// renewal. 178func (s *Service) ServerTLSConfig() *tls.Config { 179 return s.tlsCfg.Get(newServerSideVerifier(s.logger, s.client, s.service)) 180} 181 182// Dial connects to a remote Connect-enabled server. The passed Resolver is used 183// to discover a single candidate instance which will be dialed and have it's 184// TLS certificate verified against the expected identity. Failures are returned 185// directly with no retries. Repeated dials may use different instances 186// depending on the Resolver implementation. 187// 188// Timeout can be managed via the Context. 189// 190// Calls to Dial made before the Service has loaded certificates from the agent 191// will fail. You can prevent this by using Ready or ReadyWait in app during 192// startup. 193func (s *Service) Dial(ctx context.Context, resolver Resolver) (net.Conn, error) { 194 addr, certURI, err := resolver.Resolve(ctx) 195 if err != nil { 196 return nil, err 197 } 198 s.logger.Debug("resolved service instance", 199 "address", addr, 200 "identity", certURI.URI(), 201 ) 202 var dialer net.Dialer 203 tcpConn, err := dialer.DialContext(ctx, "tcp", addr) 204 if err != nil { 205 return nil, err 206 } 207 208 tlsConn := tls.Client(tcpConn, s.tlsCfg.Get(clientSideVerifier)) 209 // Set deadline for Handshake to complete. 210 deadline, ok := ctx.Deadline() 211 if ok { 212 tlsConn.SetDeadline(deadline) 213 } 214 // Perform handshake 215 if err = tlsConn.Handshake(); err != nil { 216 tlsConn.Close() 217 return nil, err 218 } 219 // Clear deadline since that was only for connection. Caller can set their own 220 // deadline later as necessary. 221 tlsConn.SetDeadline(time.Time{}) 222 223 // Verify that the connect server's URI matches certURI 224 err = verifyServerCertMatchesURI(tlsConn.ConnectionState().PeerCertificates, 225 certURI) 226 if err != nil { 227 tlsConn.Close() 228 return nil, err 229 } 230 s.logger.Debug("successfully connected to service instance", 231 "address", addr, 232 "identity", certURI.URI(), 233 ) 234 return tlsConn, nil 235} 236 237// HTTPDialTLS is compatible with http.Transport.DialTLS. It expects the addr 238// hostname to be specified using Consul DNS query syntax, e.g. 239// "web.service.consul". It converts that into the equivalent ConsulResolver and 240// then call s.Dial with the resolver. This is low level, clients should 241// typically use HTTPClient directly. 242func (s *Service) HTTPDialTLS(network, 243 addr string) (net.Conn, error) { 244 if s.httpResolverFromAddr == nil { 245 return nil, errors.New("no http resolver configured") 246 } 247 r, err := s.httpResolverFromAddr(addr) 248 if err != nil { 249 return nil, err 250 } 251 // TODO(banks): figure out how to do timeouts better. 252 return s.Dial(context.Background(), r) 253} 254 255// HTTPClient returns an *http.Client configured to dial remote Consul Connect 256// HTTP services. The client will return an error if attempting to make requests 257// to a non HTTPS hostname. It resolves the domain of the request with the same 258// syntax as Consul DNS queries although it performs discovery directly via the 259// API rather than just relying on Consul DNS. Hostnames that are not valid 260// Consul DNS queries will fail. 261func (s *Service) HTTPClient() *http.Client { 262 t := &http.Transport{ 263 // Sadly we can't use DialContext hook since that is expected to return a 264 // plain TCP connection and http.Client tries to start a TLS handshake over 265 // it. We need to control the handshake to be able to do our validation. 266 // So we have to use the older DialTLS which means no context/timeout 267 // support. 268 // 269 // TODO(banks): figure out how users can configure a timeout when using 270 // this and/or compatibility with http.Request.WithContext. 271 DialTLS: s.HTTPDialTLS, 272 } 273 // Need to manually re-enable http2 support since we set custom DialTLS. 274 // See https://golang.org/src/net/http/transport.go?s=8692:9036#L228 275 http2.ConfigureTransport(t) 276 return &http.Client{ 277 Transport: t, 278 } 279} 280 281// Close stops the service and frees resources. 282func (s *Service) Close() error { 283 if s.rootsWatch != nil { 284 s.rootsWatch.Stop() 285 } 286 if s.leafWatch != nil { 287 s.leafWatch.Stop() 288 } 289 return nil 290} 291 292func (s *Service) rootsWatchHandler(blockParam watch.BlockingParamVal, raw interface{}) { 293 if raw == nil { 294 return 295 } 296 v, ok := raw.(*api.CARootList) 297 if !ok || v == nil { 298 s.logger.Error("got invalid response from root watch") 299 return 300 } 301 302 // Got new root certificates, update the tls.Configs. 303 roots := x509.NewCertPool() 304 for _, root := range v.Roots { 305 roots.AppendCertsFromPEM([]byte(root.RootCertPEM)) 306 } 307 308 s.tlsCfg.SetRoots(roots) 309} 310 311func (s *Service) leafWatchHandler(blockParam watch.BlockingParamVal, raw interface{}) { 312 if raw == nil { 313 return // ignore 314 } 315 v, ok := raw.(*api.LeafCert) 316 if !ok || v == nil { 317 s.logger.Error("got invalid response from leaf watch") 318 return 319 } 320 321 // Got new leaf, update the tls.Configs 322 cert, err := tls.X509KeyPair([]byte(v.CertPEM), []byte(v.PrivateKeyPEM)) 323 if err != nil { 324 s.logger.Error("failed to parse new leaf cert", "error", err) 325 return 326 } 327 328 s.tlsCfg.SetLeaf(&cert) 329} 330 331// Ready returns whether or not both roots and a leaf certificate are 332// configured. If both are non-nil, they are assumed to be valid and usable. 333func (s *Service) Ready() bool { 334 return s.tlsCfg.Ready() 335} 336 337// ReadyWait returns a chan that is closed when the Service becomes ready 338// for use for the first time. Note that if the Service is ready when it is 339// called it returns a nil chan. Ready means that it has root and leaf 340// certificates configured which we assume are valid. The service may 341// subsequently stop being "ready" if it's certificates expire or are revoked 342// and an error prevents new ones being loaded but this method will not stop 343// returning a nil chan in that case. It is only useful for initial startup. For 344// ongoing health Ready() should be used. 345func (s *Service) ReadyWait() <-chan struct{} { 346 return s.tlsCfg.ReadyWait() 347} 348