1package taskrunner
2
3import (
4	"context"
5	"fmt"
6	"sync/atomic"
7	"testing"
8	"time"
9
10	"github.com/hashicorp/consul/api"
11	hclog "github.com/hashicorp/go-hclog"
12	"github.com/hashicorp/nomad/client/allocrunner/taskrunner/interfaces"
13	"github.com/hashicorp/nomad/client/consul"
14	"github.com/hashicorp/nomad/client/taskenv"
15	agentconsul "github.com/hashicorp/nomad/command/agent/consul"
16	"github.com/hashicorp/nomad/helper/testlog"
17	"github.com/hashicorp/nomad/nomad/mock"
18	"github.com/hashicorp/nomad/nomad/structs"
19	"github.com/stretchr/testify/require"
20)
21
22func newScriptMock(hb TTLUpdater, exec interfaces.ScriptExecutor, logger hclog.Logger, interval, timeout time.Duration) *scriptCheck {
23	script := newScriptCheck(&scriptCheckConfig{
24		allocID:   "allocid",
25		taskName:  "testtask",
26		serviceID: "serviceid",
27		check: &structs.ServiceCheck{
28			Interval: interval,
29			Timeout:  timeout,
30		},
31		ttlUpdater: hb,
32		driverExec: exec,
33		taskEnv:    &taskenv.TaskEnv{},
34		logger:     logger,
35		shutdownCh: nil,
36	})
37	script.callback = newScriptCheckCallback(script)
38	script.lastCheckOk = true
39	return script
40}
41
42// fakeHeartbeater implements the TTLUpdater interface to allow mocking out
43// Consul in script executor tests.
44type fakeHeartbeater struct {
45	heartbeats chan heartbeat
46}
47
48func (f *fakeHeartbeater) UpdateTTL(checkID, namespace, output, status string) error {
49	f.heartbeats <- heartbeat{checkID: checkID, output: output, status: status}
50	return nil
51}
52
53func newFakeHeartbeater() *fakeHeartbeater {
54	return &fakeHeartbeater{heartbeats: make(chan heartbeat)}
55}
56
57type heartbeat struct {
58	checkID string
59	output  string
60	status  string
61}
62
63// TestScript_Exec_Cancel asserts cancelling a script check shortcircuits
64// any running scripts.
65func TestScript_Exec_Cancel(t *testing.T) {
66	exec, cancel := newBlockingScriptExec()
67	defer cancel()
68
69	logger := testlog.HCLogger(t)
70	script := newScriptMock(nil, // TTLUpdater should never be called
71		exec, logger, time.Hour, time.Hour)
72
73	handle := script.run()
74	<-exec.running  // wait until Exec is called
75	handle.cancel() // cancel now that we're blocked in exec
76
77	select {
78	case <-handle.wait():
79	case <-time.After(3 * time.Second):
80		t.Fatalf("timed out waiting for script check to exit")
81	}
82
83	// The underlying ScriptExecutor (newBlockScriptExec) *cannot* be
84	// canceled. Only a wrapper around it obeys the context cancelation.
85	require.NotEqual(t, atomic.LoadInt32(&exec.exited), 1,
86		"expected script executor to still be running after timeout")
87}
88
89// TestScript_Exec_TimeoutBasic asserts a script will be killed when the
90// timeout is reached.
91func TestScript_Exec_TimeoutBasic(t *testing.T) {
92	t.Parallel()
93	exec, cancel := newBlockingScriptExec()
94	defer cancel()
95
96	logger := testlog.HCLogger(t)
97	hb := newFakeHeartbeater()
98	script := newScriptMock(hb, exec, logger, time.Hour, time.Second)
99
100	handle := script.run()
101	defer handle.cancel() // cleanup
102	<-exec.running        // wait until Exec is called
103
104	// Check for UpdateTTL call
105	select {
106	case update := <-hb.heartbeats:
107		require.Equal(t, update.output, context.DeadlineExceeded.Error())
108		require.Equal(t, update.status, api.HealthCritical)
109	case <-time.After(3 * time.Second):
110		t.Fatalf("timed out waiting for script check to exit")
111	}
112
113	// The underlying ScriptExecutor (newBlockScriptExec) *cannot* be
114	// canceled. Only a wrapper around it obeys the context cancelation.
115	require.NotEqual(t, atomic.LoadInt32(&exec.exited), 1,
116		"expected script executor to still be running after timeout")
117
118	// Cancel and watch for exit
119	handle.cancel()
120	select {
121	case <-handle.wait(): // ok!
122	case update := <-hb.heartbeats:
123		t.Errorf("unexpected UpdateTTL call on exit with status=%q", update)
124	case <-time.After(3 * time.Second):
125		t.Fatalf("timed out waiting for script check to exit")
126	}
127}
128
129// TestScript_Exec_TimeoutCritical asserts a script will be killed when
130// the timeout is reached and always set a critical status regardless of what
131// Exec returns.
132func TestScript_Exec_TimeoutCritical(t *testing.T) {
133	t.Parallel()
134	logger := testlog.HCLogger(t)
135	hb := newFakeHeartbeater()
136	script := newScriptMock(hb, sleeperExec{}, logger, time.Hour, time.Nanosecond)
137
138	handle := script.run()
139	defer handle.cancel() // cleanup
140
141	// Check for UpdateTTL call
142	select {
143	case update := <-hb.heartbeats:
144		require.Equal(t, update.output, context.DeadlineExceeded.Error())
145		require.Equal(t, update.status, api.HealthCritical)
146	case <-time.After(3 * time.Second):
147		t.Fatalf("timed out waiting for script check to timeout")
148	}
149}
150
151// TestScript_Exec_Shutdown asserts a script will be executed once more
152// when told to shutdown.
153func TestScript_Exec_Shutdown(t *testing.T) {
154	shutdown := make(chan struct{})
155	exec := newSimpleExec(0, nil)
156	logger := testlog.HCLogger(t)
157	hb := newFakeHeartbeater()
158	script := newScriptMock(hb, exec, logger, time.Hour, 3*time.Second)
159	script.shutdownCh = shutdown
160
161	handle := script.run()
162	defer handle.cancel() // cleanup
163	close(shutdown)       // tell scriptCheck to exit
164
165	select {
166	case update := <-hb.heartbeats:
167		require.Equal(t, update.output, "code=0 err=<nil>")
168		require.Equal(t, update.status, api.HealthPassing)
169	case <-time.After(3 * time.Second):
170		t.Fatalf("timed out waiting for script check to exit")
171	}
172
173	select {
174	case <-handle.wait(): // ok!
175	case <-time.After(3 * time.Second):
176		t.Fatalf("timed out waiting for script check to exit")
177	}
178}
179
180// TestScript_Exec_Codes asserts script exit codes are translated to their
181// corresponding Consul health check status.
182func TestScript_Exec_Codes(t *testing.T) {
183
184	exec := newScriptedExec([]execResult{
185		{[]byte("output"), 1, nil},
186		{[]byte("output"), 0, nil},
187		{[]byte("output"), 0, context.DeadlineExceeded},
188		{[]byte("output"), 0, nil},
189		{[]byte("<ignored output>"), 2, fmt.Errorf("some error")},
190		{[]byte("output"), 0, nil},
191		{[]byte("error9000"), 9000, nil},
192	})
193	logger := testlog.HCLogger(t)
194	hb := newFakeHeartbeater()
195	script := newScriptMock(
196		hb, exec, logger, time.Nanosecond, 3*time.Second)
197
198	handle := script.run()
199	defer handle.cancel() // cleanup
200	deadline := time.After(3 * time.Second)
201
202	expected := []heartbeat{
203		{script.id, "output", api.HealthWarning},
204		{script.id, "output", api.HealthPassing},
205		{script.id, context.DeadlineExceeded.Error(), api.HealthCritical},
206		{script.id, "output", api.HealthPassing},
207		{script.id, "some error", api.HealthCritical},
208		{script.id, "output", api.HealthPassing},
209		{script.id, "error9000", api.HealthCritical},
210	}
211
212	for i := 0; i <= 6; i++ {
213		select {
214		case update := <-hb.heartbeats:
215			require.Equal(t, update, expected[i],
216				"expected update %d to be '%s' but received '%s'",
217				i, expected[i], update)
218		case <-deadline:
219			t.Fatalf("timed out waiting for all script checks to finish")
220		}
221	}
222}
223
224// TestScript_TaskEnvInterpolation asserts that script check hooks are
225// interpolated in the same way that services are
226func TestScript_TaskEnvInterpolation(t *testing.T) {
227
228	logger := testlog.HCLogger(t)
229	consulClient := consul.NewMockConsulServiceClient(t, logger)
230	exec, cancel := newBlockingScriptExec()
231	defer cancel()
232
233	alloc := mock.ConnectAlloc()
234	task := alloc.Job.TaskGroups[0].Tasks[0]
235
236	task.Services[0].Name = "${NOMAD_JOB_NAME}-${TASK}-${SVC_NAME}"
237	task.Services[0].Checks[0].Name = "${NOMAD_JOB_NAME}-${SVC_NAME}-check"
238	alloc.Job.Canonicalize() // need to re-canonicalize b/c the mock already did it
239
240	env := taskenv.NewBuilder(mock.Node(), alloc, task, "global").SetHookEnv(
241		"script_check",
242		map[string]string{"SVC_NAME": "frontend"}).Build()
243
244	svcHook := newServiceHook(serviceHookConfig{
245		alloc:          alloc,
246		task:           task,
247		consulServices: consulClient,
248		logger:         logger,
249	})
250	// emulate prestart having been fired
251	svcHook.taskEnv = env
252
253	scHook := newScriptCheckHook(scriptCheckHookConfig{
254		alloc:        alloc,
255		task:         task,
256		consul:       consulClient,
257		logger:       logger,
258		shutdownWait: time.Hour, // TTLUpdater will never be called
259	})
260	// emulate prestart having been fired
261	scHook.taskEnv = env
262	scHook.driverExec = exec
263
264	expectedSvc := svcHook.getWorkloadServices().Services[0]
265	expected := agentconsul.MakeCheckID(agentconsul.MakeAllocServiceID(
266		alloc.ID, task.Name, expectedSvc), expectedSvc.Checks[0])
267
268	actual := scHook.newScriptChecks()
269	check, ok := actual[expected]
270	require.True(t, ok)
271	require.Equal(t, "my-job-frontend-check", check.check.Name)
272
273	// emulate an update
274	env = taskenv.NewBuilder(mock.Node(), alloc, task, "global").SetHookEnv(
275		"script_check",
276		map[string]string{"SVC_NAME": "backend"}).Build()
277	scHook.taskEnv = env
278	svcHook.taskEnv = env
279
280	expectedSvc = svcHook.getWorkloadServices().Services[0]
281	expected = agentconsul.MakeCheckID(agentconsul.MakeAllocServiceID(
282		alloc.ID, task.Name, expectedSvc), expectedSvc.Checks[0])
283
284	actual = scHook.newScriptChecks()
285	check, ok = actual[expected]
286	require.True(t, ok)
287	require.Equal(t, "my-job-backend-check", check.check.Name)
288}
289
290func TestScript_associated(t *testing.T) {
291	t.Run("neither set", func(t *testing.T) {
292		require.False(t, new(scriptCheckHook).associated("task1", "", ""))
293	})
294
295	t.Run("service set", func(t *testing.T) {
296		require.True(t, new(scriptCheckHook).associated("task1", "task1", ""))
297		require.False(t, new(scriptCheckHook).associated("task1", "task2", ""))
298	})
299
300	t.Run("check set", func(t *testing.T) {
301		require.True(t, new(scriptCheckHook).associated("task1", "", "task1"))
302		require.False(t, new(scriptCheckHook).associated("task1", "", "task2"))
303	})
304
305	t.Run("both set", func(t *testing.T) {
306		// ensure check.task takes precedence over service.task
307		require.True(t, new(scriptCheckHook).associated("task1", "task1", "task1"))
308		require.False(t, new(scriptCheckHook).associated("task1", "task1", "task2"))
309		require.True(t, new(scriptCheckHook).associated("task1", "task2", "task1"))
310		require.False(t, new(scriptCheckHook).associated("task1", "task2", "task2"))
311	})
312}
313