1package autoconf
2
3import (
4	"context"
5	"crypto/x509"
6	"crypto/x509/pkix"
7	"encoding/asn1"
8	"fmt"
9	"net"
10	"net/url"
11	"testing"
12	"time"
13
14	"github.com/stretchr/testify/mock"
15	"github.com/stretchr/testify/require"
16
17	"github.com/hashicorp/consul/agent/cache"
18	cachetype "github.com/hashicorp/consul/agent/cache-types"
19	"github.com/hashicorp/consul/agent/config"
20	"github.com/hashicorp/consul/agent/connect"
21	"github.com/hashicorp/consul/agent/metadata"
22	"github.com/hashicorp/consul/agent/structs"
23	"github.com/hashicorp/consul/lib/retry"
24	"github.com/hashicorp/consul/sdk/testutil"
25)
26
27func TestAutoEncrypt_generateCSR(t *testing.T) {
28	type testCase struct {
29		conf *config.RuntimeConfig
30
31		// to validate the csr
32		expectedSubject  pkix.Name
33		expectedSigAlg   x509.SignatureAlgorithm
34		expectedPubAlg   x509.PublicKeyAlgorithm
35		expectedDNSNames []string
36		expectedIPs      []net.IP
37		expectedURIs     []*url.URL
38	}
39
40	cases := map[string]testCase{
41		"ip-sans": {
42			conf: &config.RuntimeConfig{
43				Datacenter:       "dc1",
44				NodeName:         "test-node",
45				AutoEncryptTLS:   true,
46				AutoEncryptIPSAN: []net.IP{net.IPv4(198, 18, 0, 1), net.IPv4(198, 18, 0, 2)},
47			},
48			expectedSubject: pkix.Name{
49				CommonName: connect.AgentCN("test-node", unknownTrustDomain),
50				Names: []pkix.AttributeTypeAndValue{
51					{
52						// 2,5,4,3 is the CommonName type ASN1 identifier
53						Type:  asn1.ObjectIdentifier{2, 5, 4, 3},
54						Value: "testnode.agnt.unknown.consul",
55					},
56				},
57			},
58			expectedSigAlg:   x509.ECDSAWithSHA256,
59			expectedPubAlg:   x509.ECDSA,
60			expectedDNSNames: defaultDNSSANs,
61			expectedIPs: append(defaultIPSANs,
62				net.IP{198, 18, 0, 1},
63				net.IP{198, 18, 0, 2},
64			),
65			expectedURIs: []*url.URL{
66				{
67					Scheme: "spiffe",
68					Host:   unknownTrustDomain,
69					Path:   "/agent/client/dc/dc1/id/test-node",
70				},
71			},
72		},
73		"dns-sans": {
74			conf: &config.RuntimeConfig{
75				Datacenter:        "dc1",
76				NodeName:          "test-node",
77				AutoEncryptTLS:    true,
78				AutoEncryptDNSSAN: []string{"foo.local", "bar.local"},
79			},
80			expectedSubject: pkix.Name{
81				CommonName: connect.AgentCN("test-node", unknownTrustDomain),
82				Names: []pkix.AttributeTypeAndValue{
83					{
84						// 2,5,4,3 is the CommonName type ASN1 identifier
85						Type:  asn1.ObjectIdentifier{2, 5, 4, 3},
86						Value: "testnode.agnt.unknown.consul",
87					},
88				},
89			},
90			expectedSigAlg:   x509.ECDSAWithSHA256,
91			expectedPubAlg:   x509.ECDSA,
92			expectedDNSNames: append(defaultDNSSANs, "foo.local", "bar.local"),
93			expectedIPs:      defaultIPSANs,
94			expectedURIs: []*url.URL{
95				{
96					Scheme: "spiffe",
97					Host:   unknownTrustDomain,
98					Path:   "/agent/client/dc/dc1/id/test-node",
99				},
100			},
101		},
102	}
103
104	for name, tcase := range cases {
105		t.Run(name, func(t *testing.T) {
106			ac := AutoConfig{config: tcase.conf}
107
108			csr, _, err := ac.generateCSR()
109			require.NoError(t, err)
110
111			request, err := connect.ParseCSR(csr)
112			require.NoError(t, err)
113			require.NotNil(t, request)
114
115			require.Equal(t, tcase.expectedSubject, request.Subject)
116			require.Equal(t, tcase.expectedSigAlg, request.SignatureAlgorithm)
117			require.Equal(t, tcase.expectedPubAlg, request.PublicKeyAlgorithm)
118			require.Equal(t, tcase.expectedDNSNames, request.DNSNames)
119			require.Equal(t, tcase.expectedIPs, request.IPAddresses)
120			require.Equal(t, tcase.expectedURIs, request.URIs)
121		})
122	}
123}
124
125func TestAutoEncrypt_hosts(t *testing.T) {
126	type testCase struct {
127		serverProvider ServerProvider
128		config         *config.RuntimeConfig
129
130		hosts []string
131		err   string
132	}
133
134	providerNone := newMockServerProvider(t)
135	providerNone.On("FindLANServer").Return(nil).Times(0)
136
137	providerWithServer := newMockServerProvider(t)
138	providerWithServer.On("FindLANServer").Return(&metadata.Server{Addr: &net.TCPAddr{IP: net.IPv4(198, 18, 0, 1), Port: 1234}}).Times(0)
139
140	cases := map[string]testCase{
141		"router-override": {
142			serverProvider: providerWithServer,
143			config: &config.RuntimeConfig{
144				RetryJoinLAN:      []string{"127.0.0.1:9876"},
145				StartJoinAddrsLAN: []string{"192.168.1.2:4321"},
146			},
147			hosts: []string{"198.18.0.1:1234"},
148		},
149		"various-addresses": {
150			serverProvider: providerNone,
151			config: &config.RuntimeConfig{
152				RetryJoinLAN:      []string{"198.18.0.1", "foo.com", "[2001:db8::1234]:1234", "abc.local:9876"},
153				StartJoinAddrsLAN: []string{"192.168.1.1:5432", "start.local", "[::ffff:172.16.5.4]", "main.dev:6789"},
154			},
155			hosts: []string{
156				"192.168.1.1",
157				"start.local",
158				"[::ffff:172.16.5.4]",
159				"main.dev",
160				"198.18.0.1",
161				"foo.com",
162				"2001:db8::1234",
163				"abc.local",
164			},
165		},
166		"split-host-port-error": {
167			serverProvider: providerNone,
168			config: &config.RuntimeConfig{
169				StartJoinAddrsLAN: []string{"this-is-not:a:ip:and_port"},
170			},
171			err: "no auto-encrypt server addresses available for use",
172		},
173	}
174
175	for name, tcase := range cases {
176		t.Run(name, func(t *testing.T) {
177			ac := AutoConfig{
178				config: tcase.config,
179				logger: testutil.Logger(t),
180				acConfig: Config{
181					ServerProvider: tcase.serverProvider,
182				},
183			}
184
185			hosts, err := ac.joinHosts()
186			if tcase.err != "" {
187				testutil.RequireErrorContains(t, err, tcase.err)
188			} else {
189				require.NoError(t, err)
190				require.Equal(t, tcase.hosts, hosts)
191			}
192		})
193	}
194}
195
196func TestAutoEncrypt_InitialCerts(t *testing.T) {
197	token := "1a148388-3dd7-4db4-9eea-520424b4a86a"
198	datacenter := "foo"
199	nodeName := "bar"
200
201	mcfg := newMockedConfig(t)
202
203	_, indexedRoots, cert := testCerts(t, nodeName, datacenter)
204
205	// The following are called once for each round through the auto-encrypt initial certs outer loop
206	// (not the per-host direct rpc attempts but the one involving the RetryWaiter)
207	mcfg.tokens.On("AgentToken").Return(token).Times(2)
208	mcfg.serverProvider.On("FindLANServer").Return(nil).Times(2)
209
210	request := structs.CASignRequest{
211		WriteRequest: structs.WriteRequest{Token: token},
212		Datacenter:   datacenter,
213		// this gets removed by the mock code as its non-deterministic what it will be
214		CSR: "",
215	}
216
217	// first failure
218	mcfg.directRPC.On("RPC",
219		datacenter,
220		nodeName,
221		&net.TCPAddr{IP: net.IPv4(198, 18, 0, 1), Port: 8300},
222		"AutoEncrypt.Sign",
223		&request,
224		&structs.SignedResponse{},
225	).Once().Return(fmt.Errorf("injected error"))
226	// second failure
227	mcfg.directRPC.On("RPC",
228		datacenter,
229		nodeName,
230		&net.TCPAddr{IP: net.IPv4(198, 18, 0, 2), Port: 8300},
231		"AutoEncrypt.Sign",
232		&request,
233		&structs.SignedResponse{},
234	).Once().Return(fmt.Errorf("injected error"))
235	// third times is successfuly (second attempt to first server)
236	mcfg.directRPC.On("RPC",
237		datacenter,
238		nodeName,
239		&net.TCPAddr{IP: net.IPv4(198, 18, 0, 1), Port: 8300},
240		"AutoEncrypt.Sign",
241		&request,
242		&structs.SignedResponse{},
243	).Once().Return(nil).Run(func(args mock.Arguments) {
244		resp, ok := args.Get(5).(*structs.SignedResponse)
245		require.True(t, ok)
246		resp.ConnectCARoots = *indexedRoots
247		resp.IssuedCert = *cert
248		resp.VerifyServerHostname = true
249	})
250
251	mcfg.Config.Waiter = &retry.Waiter{MinFailures: 2, MaxWait: time.Millisecond}
252
253	ac := AutoConfig{
254		config: &config.RuntimeConfig{
255			Datacenter:   datacenter,
256			NodeName:     nodeName,
257			RetryJoinLAN: []string{"198.18.0.1:1234", "198.18.0.2:3456"},
258			ServerPort:   8300,
259		},
260		acConfig: mcfg.Config,
261		logger:   testutil.Logger(t),
262	}
263
264	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
265	defer cancel()
266	resp, err := ac.autoEncryptInitialCerts(ctx)
267	require.NoError(t, err)
268	require.NotNil(t, resp)
269	require.True(t, resp.VerifyServerHostname)
270	require.NotEmpty(t, resp.IssuedCert.PrivateKeyPEM)
271	resp.IssuedCert.PrivateKeyPEM = ""
272	cert.PrivateKeyPEM = ""
273	require.Equal(t, cert, &resp.IssuedCert)
274	require.Equal(t, indexedRoots, &resp.ConnectCARoots)
275	require.Empty(t, resp.ManualCARoots)
276}
277
278func TestAutoEncrypt_InitialConfiguration(t *testing.T) {
279	token := "010494ae-ee45-4433-903c-a58c91297714"
280	nodeName := "auto-encrypt"
281	datacenter := "dc1"
282
283	mcfg := newMockedConfig(t)
284	loader := setupRuntimeConfig(t)
285	loader.addConfigHCL(`
286		auto_encrypt {
287			tls = true
288		}
289	`)
290	loader.opts.FlagValues.NodeName = &nodeName
291	mcfg.Config.Loader = loader.Load
292
293	indexedRoots, cert, extraCerts := mcfg.setupInitialTLS(t, nodeName, datacenter, token)
294
295	// prepopulation is going to grab the token to populate the correct cache key
296	mcfg.tokens.On("AgentToken").Return(token).Times(0)
297
298	// no server provider
299	mcfg.serverProvider.On("FindLANServer").Return(&metadata.Server{Addr: &net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 8300}}).Times(1)
300
301	populateResponse := func(args mock.Arguments) {
302		resp, ok := args.Get(5).(*structs.SignedResponse)
303		require.True(t, ok)
304		*resp = structs.SignedResponse{
305			VerifyServerHostname: true,
306			ConnectCARoots:       *indexedRoots,
307			IssuedCert:           *cert,
308			ManualCARoots:        extraCerts,
309		}
310	}
311
312	expectedRequest := structs.CASignRequest{
313		WriteRequest: structs.WriteRequest{Token: token},
314		Datacenter:   datacenter,
315		// TODO (autoconf) Maybe in the future we should populate a CSR
316		// and do some manual parsing/verification of the contents. The
317		// bits not having to do with the signing key such as the requested
318		// SANs and CN. For now though the mockDirectRPC type will empty
319		// the CSR so we have to pass in an empty string to the expectation.
320		CSR: "",
321	}
322
323	mcfg.directRPC.On(
324		"RPC",
325		datacenter,
326		nodeName,
327		&net.TCPAddr{IP: net.IPv4(127, 0, 0, 1), Port: 8300},
328		"AutoEncrypt.Sign",
329		&expectedRequest,
330		&structs.SignedResponse{}).Return(nil).Run(populateResponse)
331
332	ac, err := New(mcfg.Config)
333	require.NoError(t, err)
334	require.NotNil(t, ac)
335
336	cfg, err := ac.InitialConfiguration(context.Background())
337	require.NoError(t, err)
338	require.NotNil(t, cfg)
339
340}
341
342func TestAutoEncrypt_TokenUpdate(t *testing.T) {
343	testAC := startedAutoConfig(t, true)
344
345	newToken := "1a4cc445-86ed-46b4-a355-bbf5a11dddb0"
346
347	rootsCtx, rootsCancel := context.WithCancel(context.Background())
348	testAC.mcfg.cache.On("Notify",
349		mock.Anything,
350		cachetype.ConnectCARootName,
351		&structs.DCSpecificRequest{Datacenter: testAC.ac.config.Datacenter},
352		rootsWatchID,
353		mock.Anything,
354	).Return(nil).Once().Run(func(args mock.Arguments) {
355		rootsCancel()
356	})
357
358	leafCtx, leafCancel := context.WithCancel(context.Background())
359	testAC.mcfg.cache.On("Notify",
360		mock.Anything,
361		cachetype.ConnectCALeafName,
362		&cachetype.ConnectCALeafRequest{
363			Datacenter: "dc1",
364			Agent:      "autoconf",
365			Token:      newToken,
366			DNSSAN:     defaultDNSSANs,
367			IPSAN:      defaultIPSANs,
368		},
369		leafWatchID,
370		mock.Anything,
371	).Return(nil).Once().Run(func(args mock.Arguments) {
372		leafCancel()
373	})
374
375	// this will be retrieved once when resetting the leaf cert watch
376	testAC.mcfg.tokens.On("AgentToken").Return(newToken).Once()
377
378	// send the notification about the token update
379	testAC.tokenUpdates <- struct{}{}
380
381	// wait for the leaf cert watches
382	require.True(t, waitForChans(100*time.Millisecond, leafCtx.Done(), rootsCtx.Done()), "New cache watches were not started within 100ms")
383}
384
385func TestAutoEncrypt_RootsUpdate(t *testing.T) {
386	testAC := startedAutoConfig(t, true)
387
388	secondCA := connect.TestCA(t, testAC.initialRoots.Roots[0])
389	secondRoots := structs.IndexedCARoots{
390		ActiveRootID: secondCA.ID,
391		TrustDomain:  connect.TestClusterID,
392		Roots: []*structs.CARoot{
393			secondCA,
394			testAC.initialRoots.Roots[0],
395		},
396		QueryMeta: structs.QueryMeta{
397			Index: 99,
398		},
399	}
400
401	updatedCtx, cancel := context.WithCancel(context.Background())
402	testAC.mcfg.tlsCfg.On("UpdateAutoTLSCA",
403		[]string{secondCA.RootCert, testAC.initialRoots.Roots[0].RootCert},
404	).Return(nil).Once().Run(func(args mock.Arguments) {
405		cancel()
406	})
407
408	// when a cache event comes in we end up recalculating the fallback timer which requires this call
409	testAC.mcfg.tlsCfg.On("AutoEncryptCertNotAfter").Return(time.Now().Add(10 * time.Minute)).Once()
410
411	req := structs.DCSpecificRequest{Datacenter: "dc1"}
412	require.True(t, testAC.mcfg.cache.sendNotification(context.Background(), req.CacheInfo().Key, cache.UpdateEvent{
413		CorrelationID: rootsWatchID,
414		Result:        &secondRoots,
415		Meta: cache.ResultMeta{
416			Index: secondRoots.Index,
417		},
418	}))
419
420	require.True(t, waitForChans(100*time.Millisecond, updatedCtx.Done()), "TLS certificates were not updated within the alotted time")
421}
422
423func TestAutoEncrypt_CertUpdate(t *testing.T) {
424	testAC := startedAutoConfig(t, true)
425	secondCert := newLeaf(t, "autoconf", "dc1", testAC.initialRoots.Roots[0], 99, 10*time.Minute)
426
427	updatedCtx, cancel := context.WithCancel(context.Background())
428	testAC.mcfg.tlsCfg.On("UpdateAutoTLSCert",
429		secondCert.CertPEM,
430		"redacted",
431	).Return(nil).Once().Run(func(args mock.Arguments) {
432		cancel()
433	})
434
435	// when a cache event comes in we end up recalculating the fallback timer which requires this call
436	testAC.mcfg.tlsCfg.On("AutoEncryptCertNotAfter").Return(secondCert.ValidBefore).Once()
437
438	req := cachetype.ConnectCALeafRequest{
439		Datacenter: "dc1",
440		Agent:      "autoconf",
441		Token:      testAC.originalToken,
442		DNSSAN:     defaultDNSSANs,
443		IPSAN:      defaultIPSANs,
444	}
445	require.True(t, testAC.mcfg.cache.sendNotification(context.Background(), req.CacheInfo().Key, cache.UpdateEvent{
446		CorrelationID: leafWatchID,
447		Result:        secondCert,
448		Meta: cache.ResultMeta{
449			Index: secondCert.ModifyIndex,
450		},
451	}))
452
453	require.True(t, waitForChans(100*time.Millisecond, updatedCtx.Done()), "TLS certificates were not updated within the alotted time")
454}
455
456func TestAutoEncrypt_Fallback(t *testing.T) {
457	testAC := startedAutoConfig(t, true)
458
459	// at this point everything is operating normally and we are just
460	// waiting for events. We are going to send a new cert that is basically
461	// already expired and then allow the fallback routine to kick in.
462	secondCert := newLeaf(t, "autoconf", "dc1", testAC.initialRoots.Roots[0], 100, time.Nanosecond)
463	secondCA := connect.TestCA(t, testAC.initialRoots.Roots[0])
464	secondRoots := structs.IndexedCARoots{
465		ActiveRootID: secondCA.ID,
466		TrustDomain:  connect.TestClusterID,
467		Roots: []*structs.CARoot{
468			secondCA,
469			testAC.initialRoots.Roots[0],
470		},
471		QueryMeta: structs.QueryMeta{
472			Index: 101,
473		},
474	}
475	thirdCert := newLeaf(t, "autoconf", "dc1", secondCA, 102, 10*time.Minute)
476
477	// setup the expectation for when the certs get updated initially
478	updatedCtx, updateCancel := context.WithCancel(context.Background())
479	testAC.mcfg.tlsCfg.On("UpdateAutoTLSCert",
480		secondCert.CertPEM,
481		"redacted",
482	).Return(nil).Once().Run(func(args mock.Arguments) {
483		updateCancel()
484	})
485
486	// when a cache event comes in we end up recalculating the fallback timer which requires this call
487	testAC.mcfg.tlsCfg.On("AutoEncryptCertNotAfter").Return(secondCert.ValidBefore).Once()
488	testAC.mcfg.tlsCfg.On("AutoEncryptCertExpired").Return(true).Once()
489
490	fallbackCtx, fallbackCancel := context.WithCancel(context.Background())
491
492	// also testing here that we can change server IPs for ongoing operations
493	testAC.mcfg.serverProvider.On("FindLANServer").Once().Return(&metadata.Server{
494		Addr: &net.TCPAddr{IP: net.IPv4(198, 18, 23, 2), Port: 8300},
495	})
496
497	// after sending the notification for the cert update another InitialConfiguration RPC
498	// will be made to pull down the latest configuration. So we need to set up the response
499	// for the second RPC
500	populateResponse := func(args mock.Arguments) {
501		resp, ok := args.Get(5).(*structs.SignedResponse)
502		require.True(t, ok)
503		*resp = structs.SignedResponse{
504			VerifyServerHostname: true,
505			ConnectCARoots:       secondRoots,
506			IssuedCert:           *thirdCert,
507			ManualCARoots:        testAC.extraCerts,
508		}
509
510		fallbackCancel()
511	}
512
513	expectedRequest := structs.CASignRequest{
514		WriteRequest: structs.WriteRequest{Token: testAC.originalToken},
515		Datacenter:   "dc1",
516		// TODO (autoconf) Maybe in the future we should populate a CSR
517		// and do some manual parsing/verification of the contents. The
518		// bits not having to do with the signing key such as the requested
519		// SANs and CN. For now though the mockDirectRPC type will empty
520		// the CSR so we have to pass in an empty string to the expectation.
521		CSR: "",
522	}
523
524	// the fallback routine to perform auto-encrypt again will need to grab this
525	testAC.mcfg.tokens.On("AgentToken").Return(testAC.originalToken).Once()
526
527	testAC.mcfg.directRPC.On(
528		"RPC",
529		"dc1",
530		"autoconf",
531		&net.TCPAddr{IP: net.IPv4(198, 18, 23, 2), Port: 8300},
532		"AutoEncrypt.Sign",
533		&expectedRequest,
534		&structs.SignedResponse{}).Return(nil).Run(populateResponse).Once()
535
536	testAC.mcfg.expectInitialTLS(t, "autoconf", "dc1", testAC.originalToken, secondCA, &secondRoots, thirdCert, testAC.extraCerts)
537
538	// after the second RPC we now will use the new certs validity period in the next run loop iteration
539	testAC.mcfg.tlsCfg.On("AutoEncryptCertNotAfter").Return(time.Now().Add(10 * time.Minute)).Once()
540
541	// now that all the mocks are set up we can trigger the whole thing by sending the second expired cert
542	// as a cache update event.
543	req := cachetype.ConnectCALeafRequest{
544		Datacenter: "dc1",
545		Agent:      "autoconf",
546		Token:      testAC.originalToken,
547		DNSSAN:     defaultDNSSANs,
548		IPSAN:      defaultIPSANs,
549	}
550	require.True(t, testAC.mcfg.cache.sendNotification(context.Background(), req.CacheInfo().Key, cache.UpdateEvent{
551		CorrelationID: leafWatchID,
552		Result:        secondCert,
553		Meta: cache.ResultMeta{
554			Index: secondCert.ModifyIndex,
555		},
556	}))
557
558	// wait for the TLS certificates to get updated
559	require.True(t, waitForChans(100*time.Millisecond, updatedCtx.Done()), "TLS certificates were not updated within the alotted time")
560
561	// now wait for the fallback routine to be invoked
562	require.True(t, waitForChans(100*time.Millisecond, fallbackCtx.Done()), "fallback routines did not get invoked within the alotted time")
563}
564