1package taskrunner 2 3import ( 4 "context" 5 6 hclog "github.com/hashicorp/go-hclog" 7 "github.com/hashicorp/nomad/client/allocrunner/interfaces" 8 "github.com/hashicorp/nomad/nomad/structs" 9 "github.com/hashicorp/nomad/plugins/drivers" 10) 11 12var _ interfaces.TaskPrestartHook = (*remoteTaskHook)(nil) 13var _ interfaces.TaskPreKillHook = (*remoteTaskHook)(nil) 14 15// remoteTaskHook reattaches to remotely executing tasks. 16type remoteTaskHook struct { 17 tr *TaskRunner 18 19 logger hclog.Logger 20} 21 22func newRemoteTaskHook(tr *TaskRunner, logger hclog.Logger) interfaces.TaskHook { 23 h := &remoteTaskHook{ 24 tr: tr, 25 } 26 h.logger = logger.Named(h.Name()) 27 return h 28} 29 30func (h *remoteTaskHook) Name() string { 31 return "remote_task" 32} 33 34// Prestart performs 2 remote task driver related tasks: 35// 1. If there is no local handle, see if there is a handle propagated from a 36// previous alloc to be restored. 37// 2. If the alloc is lost make sure the task signal is set to detach instead 38// of kill. 39func (h *remoteTaskHook) Prestart(ctx context.Context, req *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error { 40 if h.tr.getDriverHandle() != nil { 41 // Driver handle already exists so don't try to load remote 42 // task handle 43 return nil 44 } 45 46 h.tr.stateLock.Lock() 47 th := drivers.NewTaskHandleFromState(h.tr.state) 48 h.tr.stateLock.Unlock() 49 50 // Task handle will be nil if there was no previous allocation or if 51 // this is a destructive update 52 if th == nil { 53 resp.Done = true 54 return nil 55 } 56 57 // The task config is unique per invocation so recreate it here 58 th.Config = h.tr.buildTaskConfig() 59 60 if err := h.tr.driver.RecoverTask(th); err != nil { 61 // Soft error here to let a new instance get started instead of 62 // failing the task since retrying is unlikely to help. 63 h.logger.Error("error recovering task state", "error", err) 64 return nil 65 } 66 67 taskInfo, err := h.tr.driver.InspectTask(th.Config.ID) 68 if err != nil { 69 // Soft error here to let a new instance get started instead of 70 // failing the task since retrying is unlikely to help. 71 h.logger.Error("error inspecting recovered task state", "error", err) 72 return nil 73 } 74 75 h.tr.setDriverHandle(NewDriverHandle(h.tr.driver, th.Config.ID, h.tr.Task(), taskInfo.NetworkOverride)) 76 77 h.tr.stateLock.Lock() 78 h.tr.localState.TaskHandle = th 79 h.tr.localState.DriverNetwork = taskInfo.NetworkOverride 80 h.tr.stateLock.Unlock() 81 82 // Ensure the signal is set according to the allocation's state 83 h.setSignal(h.tr.Alloc()) 84 85 // Emit TaskStarted manually since the normal task runner logic will 86 // treat this task like a restored task and skip emitting started. 87 h.tr.UpdateState(structs.TaskStateRunning, structs.NewTaskEvent(structs.TaskStarted)) 88 89 return nil 90} 91 92// PreKilling tells the remote task driver to detach a remote task instead of 93// stopping it. 94func (h *remoteTaskHook) PreKilling(ctx context.Context, req *interfaces.TaskPreKillRequest, resp *interfaces.TaskPreKillResponse) error { 95 alloc := h.tr.Alloc() 96 h.setSignal(alloc) 97 return nil 98} 99 100// setSignal to detach if the allocation is lost or draining. Safe to call 101// multiple times as it only transitions to using detach -- never back to kill. 102func (h *remoteTaskHook) setSignal(alloc *structs.Allocation) { 103 driverHandle := h.tr.getDriverHandle() 104 if driverHandle == nil { 105 // Nothing to do exit early 106 return 107 } 108 109 switch { 110 case alloc.ClientStatus == structs.AllocClientStatusLost: 111 // Continue on; lost allocs should just detach 112 h.logger.Debug("detaching from remote task since alloc was lost") 113 case alloc.DesiredTransition.ShouldMigrate(): 114 // Continue on; migrating allocs should just detach 115 h.logger.Debug("detaching from remote task since alloc was drained") 116 default: 117 // Nothing to do exit early 118 return 119 } 120 121 // Set DetachSignal to indicate to the remote task driver that it 122 // should detach this remote task and ignore it. 123 driverHandle.SetKillSignal(drivers.DetachSignal) 124} 125