1package taskrunner
2
3import (
4	"context"
5
6	hclog "github.com/hashicorp/go-hclog"
7	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
8	"github.com/hashicorp/nomad/nomad/structs"
9	"github.com/hashicorp/nomad/plugins/drivers"
10)
11
12var _ interfaces.TaskPrestartHook = (*remoteTaskHook)(nil)
13var _ interfaces.TaskPreKillHook = (*remoteTaskHook)(nil)
14
15// remoteTaskHook reattaches to remotely executing tasks.
16type remoteTaskHook struct {
17	tr *TaskRunner
18
19	logger hclog.Logger
20}
21
22func newRemoteTaskHook(tr *TaskRunner, logger hclog.Logger) interfaces.TaskHook {
23	h := &remoteTaskHook{
24		tr: tr,
25	}
26	h.logger = logger.Named(h.Name())
27	return h
28}
29
30func (h *remoteTaskHook) Name() string {
31	return "remote_task"
32}
33
34// Prestart performs 2 remote task driver related tasks:
35//   1. If there is no local handle, see if there is a handle propagated from a
36//      previous alloc to be restored.
37//   2. If the alloc is lost make sure the task signal is set to detach instead
38//      of kill.
39func (h *remoteTaskHook) Prestart(ctx context.Context, req *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error {
40	if h.tr.getDriverHandle() != nil {
41		// Driver handle already exists so don't try to load remote
42		// task handle
43		return nil
44	}
45
46	h.tr.stateLock.Lock()
47	th := drivers.NewTaskHandleFromState(h.tr.state)
48	h.tr.stateLock.Unlock()
49
50	// Task handle will be nil if there was no previous allocation or if
51	// this is a destructive update
52	if th == nil {
53		resp.Done = true
54		return nil
55	}
56
57	// The task config is unique per invocation so recreate it here
58	th.Config = h.tr.buildTaskConfig()
59
60	if err := h.tr.driver.RecoverTask(th); err != nil {
61		// Soft error here to let a new instance get started instead of
62		// failing the task since retrying is unlikely to help.
63		h.logger.Error("error recovering task state", "error", err)
64		return nil
65	}
66
67	taskInfo, err := h.tr.driver.InspectTask(th.Config.ID)
68	if err != nil {
69		// Soft error here to let a new instance get started instead of
70		// failing the task since retrying is unlikely to help.
71		h.logger.Error("error inspecting recovered task state", "error", err)
72		return nil
73	}
74
75	h.tr.setDriverHandle(NewDriverHandle(h.tr.driver, th.Config.ID, h.tr.Task(), taskInfo.NetworkOverride))
76
77	h.tr.stateLock.Lock()
78	h.tr.localState.TaskHandle = th
79	h.tr.localState.DriverNetwork = taskInfo.NetworkOverride
80	h.tr.stateLock.Unlock()
81
82	// Ensure the signal is set according to the allocation's state
83	h.setSignal(h.tr.Alloc())
84
85	// Emit TaskStarted manually since the normal task runner logic will
86	// treat this task like a restored task and skip emitting started.
87	h.tr.UpdateState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskStarted))
88
89	return nil
90}
91
92// PreKilling tells the remote task driver to detach a remote task instead of
93// stopping it.
94func (h *remoteTaskHook) PreKilling(ctx context.Context, req *interfaces.TaskPreKillRequest, resp *interfaces.TaskPreKillResponse) error {
95	alloc := h.tr.Alloc()
96	h.setSignal(alloc)
97	return nil
98}
99
100// setSignal to detach if the allocation is lost or draining. Safe to call
101// multiple times as it only transitions to using detach -- never back to kill.
102func (h *remoteTaskHook) setSignal(alloc *structs.Allocation) {
103	driverHandle := h.tr.getDriverHandle()
104	if driverHandle == nil {
105		// Nothing to do exit early
106		return
107	}
108
109	switch {
110	case alloc.ClientStatus == structs.AllocClientStatusLost:
111		// Continue on; lost allocs should just detach
112		h.logger.Debug("detaching from remote task since alloc was lost")
113	case alloc.DesiredTransition.ShouldMigrate():
114		// Continue on; migrating allocs should just detach
115		h.logger.Debug("detaching from remote task since alloc was drained")
116	default:
117		// Nothing to do exit early
118		return
119	}
120
121	// Set DetachSignal to indicate to the remote task driver that it
122	// should detach this remote task and ignore it.
123	driverHandle.SetKillSignal(drivers.DetachSignal)
124}
125