1package command
2
3import (
4	"fmt"
5	"sort"
6	"strings"
7	"time"
8
9	"github.com/hashicorp/nomad/api"
10	"github.com/hashicorp/nomad/api/contexts"
11	"github.com/hashicorp/nomad/nomad/structs"
12	"github.com/posener/complete"
13)
14
15const (
16	// maxFailedTGs is the maximum number of task groups we show failure reasons
17	// for before deferring to eval-status
18	maxFailedTGs = 5
19)
20
21type JobStatusCommand struct {
22	Meta
23	length    int
24	evals     bool
25	allAllocs bool
26	verbose   bool
27}
28
29func (c *JobStatusCommand) Help() string {
30	helpText := `
31Usage: nomad status [options] <job>
32
33  Display status information about a job. If no job ID is given, a list of all
34  known jobs will be displayed.
35
36General Options:
37
38  ` + generalOptionsUsage() + `
39
40Status Options:
41
42  -short
43    Display short output. Used only when a single job is being
44    queried, and drops verbose information about allocations.
45
46  -evals
47    Display the evaluations associated with the job.
48
49  -all-allocs
50    Display all allocations matching the job ID, including those from an older
51    instance of the job.
52
53  -verbose
54    Display full information.
55`
56	return strings.TrimSpace(helpText)
57}
58
59func (c *JobStatusCommand) Synopsis() string {
60	return "Display status information about a job"
61}
62
63func (c *JobStatusCommand) AutocompleteFlags() complete.Flags {
64	return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
65		complete.Flags{
66			"-all-allocs": complete.PredictNothing,
67			"-evals":      complete.PredictNothing,
68			"-short":      complete.PredictNothing,
69			"-verbose":    complete.PredictNothing,
70		})
71}
72
73func (c *JobStatusCommand) AutocompleteArgs() complete.Predictor {
74	return complete.PredictFunc(func(a complete.Args) []string {
75		client, err := c.Meta.Client()
76		if err != nil {
77			return nil
78		}
79
80		resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Jobs, nil)
81		if err != nil {
82			return []string{}
83		}
84		return resp.Matches[contexts.Jobs]
85	})
86}
87
88func (c *JobStatusCommand) Name() string { return "status" }
89
90func (c *JobStatusCommand) Run(args []string) int {
91	var short bool
92
93	flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
94	flags.Usage = func() { c.Ui.Output(c.Help()) }
95	flags.BoolVar(&short, "short", false, "")
96	flags.BoolVar(&c.evals, "evals", false, "")
97	flags.BoolVar(&c.allAllocs, "all-allocs", false, "")
98	flags.BoolVar(&c.verbose, "verbose", false, "")
99
100	if err := flags.Parse(args); err != nil {
101		return 1
102	}
103
104	// Check that we either got no jobs or exactly one.
105	args = flags.Args()
106	if len(args) > 1 {
107		c.Ui.Error("This command takes either no arguments or one: <job>")
108		c.Ui.Error(commandErrorText(c))
109		return 1
110	}
111
112	// Truncate the id unless full length is requested
113	c.length = shortId
114	if c.verbose {
115		c.length = fullId
116	}
117
118	// Get the HTTP client
119	client, err := c.Meta.Client()
120	if err != nil {
121		c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
122		return 1
123	}
124
125	// Invoke list mode if no job ID.
126	if len(args) == 0 {
127		jobs, _, err := client.Jobs().List(nil)
128		if err != nil {
129			c.Ui.Error(fmt.Sprintf("Error querying jobs: %s", err))
130			return 1
131		}
132
133		if len(jobs) == 0 {
134			// No output if we have no jobs
135			c.Ui.Output("No running jobs")
136		} else {
137			c.Ui.Output(createStatusListOutput(jobs))
138		}
139		return 0
140	}
141
142	// Try querying the job
143	jobID := args[0]
144
145	jobs, _, err := client.Jobs().PrefixList(jobID)
146	if err != nil {
147		c.Ui.Error(fmt.Sprintf("Error querying job: %s", err))
148		return 1
149	}
150	if len(jobs) == 0 {
151		c.Ui.Error(fmt.Sprintf("No job(s) with prefix or id %q found", jobID))
152		return 1
153	}
154	if len(jobs) > 1 && strings.TrimSpace(jobID) != jobs[0].ID {
155		c.Ui.Error(fmt.Sprintf("Prefix matched multiple jobs\n\n%s", createStatusListOutput(jobs)))
156		return 1
157	}
158	// Prefix lookup matched a single job
159	job, _, err := client.Jobs().Info(jobs[0].ID, nil)
160	if err != nil {
161		c.Ui.Error(fmt.Sprintf("Error querying job: %s", err))
162		return 1
163	}
164
165	periodic := job.IsPeriodic()
166	parameterized := job.IsParameterized()
167
168	// Format the job info
169	basic := []string{
170		fmt.Sprintf("ID|%s", *job.ID),
171		fmt.Sprintf("Name|%s", *job.Name),
172		fmt.Sprintf("Submit Date|%s", formatTime(time.Unix(0, *job.SubmitTime))),
173		fmt.Sprintf("Type|%s", *job.Type),
174		fmt.Sprintf("Priority|%d", *job.Priority),
175		fmt.Sprintf("Datacenters|%s", strings.Join(job.Datacenters, ",")),
176		fmt.Sprintf("Status|%s", getStatusString(*job.Status, job.Stop)),
177		fmt.Sprintf("Periodic|%v", periodic),
178		fmt.Sprintf("Parameterized|%v", parameterized),
179	}
180
181	if periodic && !parameterized {
182		if *job.Stop {
183			basic = append(basic, fmt.Sprintf("Next Periodic Launch|none (job stopped)"))
184		} else {
185			location, err := job.Periodic.GetLocation()
186			if err == nil {
187				now := time.Now().In(location)
188				next, err := job.Periodic.Next(now)
189				if err == nil {
190					basic = append(basic, fmt.Sprintf("Next Periodic Launch|%s",
191						fmt.Sprintf("%s (%s from now)",
192							formatTime(next), formatTimeDifference(now, next, time.Second))))
193				}
194			}
195		}
196	}
197
198	c.Ui.Output(formatKV(basic))
199
200	// Exit early
201	if short {
202		return 0
203	}
204
205	// Print periodic job information
206	if periodic && !parameterized {
207		if err := c.outputPeriodicInfo(client, job); err != nil {
208			c.Ui.Error(err.Error())
209			return 1
210		}
211	} else if parameterized {
212		if err := c.outputParameterizedInfo(client, job); err != nil {
213			c.Ui.Error(err.Error())
214			return 1
215		}
216	} else {
217		if err := c.outputJobInfo(client, job); err != nil {
218			c.Ui.Error(err.Error())
219			return 1
220		}
221	}
222
223	return 0
224}
225
226// outputPeriodicInfo prints information about the passed periodic job. If a
227// request fails, an error is returned.
228func (c *JobStatusCommand) outputPeriodicInfo(client *api.Client, job *api.Job) error {
229	// Output the summary
230	if err := c.outputJobSummary(client, job); err != nil {
231		return err
232	}
233
234	// Generate the prefix that matches launched jobs from the periodic job.
235	prefix := fmt.Sprintf("%s%s", *job.ID, structs.PeriodicLaunchSuffix)
236	children, _, err := client.Jobs().PrefixList(prefix)
237	if err != nil {
238		return fmt.Errorf("Error querying job: %s", err)
239	}
240
241	if len(children) == 0 {
242		c.Ui.Output("\nNo instances of periodic job found")
243		return nil
244	}
245
246	out := make([]string, 1)
247	out[0] = "ID|Status"
248	for _, child := range children {
249		// Ensure that we are only showing jobs whose parent is the requested
250		// job.
251		if child.ParentID != *job.ID {
252			continue
253		}
254
255		out = append(out, fmt.Sprintf("%s|%s",
256			child.ID,
257			child.Status))
258	}
259
260	c.Ui.Output(c.Colorize().Color("\n[bold]Previously Launched Jobs[reset]"))
261	c.Ui.Output(formatList(out))
262	return nil
263}
264
265// outputParameterizedInfo prints information about a parameterized job. If a
266// request fails, an error is returned.
267func (c *JobStatusCommand) outputParameterizedInfo(client *api.Client, job *api.Job) error {
268	// Output parameterized job details
269	c.Ui.Output(c.Colorize().Color("\n[bold]Parameterized Job[reset]"))
270	parameterizedJob := make([]string, 3)
271	parameterizedJob[0] = fmt.Sprintf("Payload|%s", job.ParameterizedJob.Payload)
272	parameterizedJob[1] = fmt.Sprintf("Required Metadata|%v", strings.Join(job.ParameterizedJob.MetaRequired, ", "))
273	parameterizedJob[2] = fmt.Sprintf("Optional Metadata|%v", strings.Join(job.ParameterizedJob.MetaOptional, ", "))
274	c.Ui.Output(formatKV(parameterizedJob))
275
276	// Output the summary
277	if err := c.outputJobSummary(client, job); err != nil {
278		return err
279	}
280
281	// Generate the prefix that matches launched jobs from the parameterized job.
282	prefix := fmt.Sprintf("%s%s", *job.ID, structs.DispatchLaunchSuffix)
283	children, _, err := client.Jobs().PrefixList(prefix)
284	if err != nil {
285		return fmt.Errorf("Error querying job: %s", err)
286	}
287
288	if len(children) == 0 {
289		c.Ui.Output("\nNo dispatched instances of parameterized job found")
290		return nil
291	}
292
293	out := make([]string, 1)
294	out[0] = "ID|Status"
295	for _, child := range children {
296		// Ensure that we are only showing jobs whose parent is the requested
297		// job.
298		if child.ParentID != *job.ID {
299			continue
300		}
301
302		out = append(out, fmt.Sprintf("%s|%s",
303			child.ID,
304			child.Status))
305	}
306
307	c.Ui.Output(c.Colorize().Color("\n[bold]Dispatched Jobs[reset]"))
308	c.Ui.Output(formatList(out))
309	return nil
310}
311
312// outputJobInfo prints information about the passed non-periodic job. If a
313// request fails, an error is returned.
314func (c *JobStatusCommand) outputJobInfo(client *api.Client, job *api.Job) error {
315
316	// Query the allocations
317	jobAllocs, _, err := client.Jobs().Allocations(*job.ID, c.allAllocs, nil)
318	if err != nil {
319		return fmt.Errorf("Error querying job allocations: %s", err)
320	}
321
322	// Query the evaluations
323	jobEvals, _, err := client.Jobs().Evaluations(*job.ID, nil)
324	if err != nil {
325		return fmt.Errorf("Error querying job evaluations: %s", err)
326	}
327
328	latestDeployment, _, err := client.Jobs().LatestDeployment(*job.ID, nil)
329	if err != nil {
330		return fmt.Errorf("Error querying latest job deployment: %s", err)
331	}
332
333	// Output the summary
334	if err := c.outputJobSummary(client, job); err != nil {
335		return err
336	}
337
338	// Determine latest evaluation with failures whose follow up hasn't
339	// completed, this is done while formatting
340	var latestFailedPlacement *api.Evaluation
341	blockedEval := false
342
343	// Format the evals
344	evals := make([]string, len(jobEvals)+1)
345	evals[0] = "ID|Priority|Triggered By|Status|Placement Failures"
346	for i, eval := range jobEvals {
347		failures, _ := evalFailureStatus(eval)
348		evals[i+1] = fmt.Sprintf("%s|%d|%s|%s|%s",
349			limit(eval.ID, c.length),
350			eval.Priority,
351			eval.TriggeredBy,
352			eval.Status,
353			failures,
354		)
355
356		if eval.Status == "blocked" {
357			blockedEval = true
358		}
359
360		if len(eval.FailedTGAllocs) == 0 {
361			// Skip evals without failures
362			continue
363		}
364
365		if latestFailedPlacement == nil || latestFailedPlacement.CreateIndex < eval.CreateIndex {
366			latestFailedPlacement = eval
367		}
368	}
369
370	if c.verbose || c.evals {
371		c.Ui.Output(c.Colorize().Color("\n[bold]Evaluations[reset]"))
372		c.Ui.Output(formatList(evals))
373	}
374
375	if blockedEval && latestFailedPlacement != nil {
376		c.outputFailedPlacements(latestFailedPlacement)
377	}
378
379	c.outputReschedulingEvals(client, job, jobAllocs, c.length)
380
381	if latestDeployment != nil {
382		c.Ui.Output(c.Colorize().Color("\n[bold]Latest Deployment[reset]"))
383		c.Ui.Output(c.Colorize().Color(c.formatDeployment(latestDeployment)))
384	}
385
386	// Format the allocs
387	c.Ui.Output(c.Colorize().Color("\n[bold]Allocations[reset]"))
388	c.Ui.Output(formatAllocListStubs(jobAllocs, c.verbose, c.length))
389	return nil
390}
391
392func (c *JobStatusCommand) formatDeployment(d *api.Deployment) string {
393	// Format the high-level elements
394	high := []string{
395		fmt.Sprintf("ID|%s", limit(d.ID, c.length)),
396		fmt.Sprintf("Status|%s", d.Status),
397		fmt.Sprintf("Description|%s", d.StatusDescription),
398	}
399
400	base := formatKV(high)
401	if len(d.TaskGroups) == 0 {
402		return base
403	}
404	base += "\n\n[bold]Deployed[reset]\n"
405	base += formatDeploymentGroups(d, c.length)
406	return base
407}
408
409func formatAllocListStubs(stubs []*api.AllocationListStub, verbose bool, uuidLength int) string {
410	if len(stubs) == 0 {
411		return "No allocations placed"
412	}
413
414	allocs := make([]string, len(stubs)+1)
415	if verbose {
416		allocs[0] = "ID|Eval ID|Node ID|Node Name|Task Group|Version|Desired|Status|Created|Modified"
417		for i, alloc := range stubs {
418			allocs[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%d|%s|%s|%s|%s",
419				limit(alloc.ID, uuidLength),
420				limit(alloc.EvalID, uuidLength),
421				limit(alloc.NodeID, uuidLength),
422				alloc.NodeName,
423				alloc.TaskGroup,
424				alloc.JobVersion,
425				alloc.DesiredStatus,
426				alloc.ClientStatus,
427				formatUnixNanoTime(alloc.CreateTime),
428				formatUnixNanoTime(alloc.ModifyTime))
429		}
430	} else {
431		allocs[0] = "ID|Node ID|Task Group|Version|Desired|Status|Created|Modified"
432		for i, alloc := range stubs {
433			now := time.Now()
434			createTimePretty := prettyTimeDiff(time.Unix(0, alloc.CreateTime), now)
435			modTimePretty := prettyTimeDiff(time.Unix(0, alloc.ModifyTime), now)
436			allocs[i+1] = fmt.Sprintf("%s|%s|%s|%d|%s|%s|%s|%s",
437				limit(alloc.ID, uuidLength),
438				limit(alloc.NodeID, uuidLength),
439				alloc.TaskGroup,
440				alloc.JobVersion,
441				alloc.DesiredStatus,
442				alloc.ClientStatus,
443				createTimePretty,
444				modTimePretty)
445		}
446	}
447
448	return formatList(allocs)
449}
450
451func formatAllocList(allocations []*api.Allocation, verbose bool, uuidLength int) string {
452	if len(allocations) == 0 {
453		return "No allocations placed"
454	}
455
456	allocs := make([]string, len(allocations)+1)
457	if verbose {
458		allocs[0] = "ID|Eval ID|Node ID|Task Group|Version|Desired|Status|Created|Modified"
459		for i, alloc := range allocations {
460			allocs[i+1] = fmt.Sprintf("%s|%s|%s|%s|%d|%s|%s|%s|%s",
461				limit(alloc.ID, uuidLength),
462				limit(alloc.EvalID, uuidLength),
463				limit(alloc.NodeID, uuidLength),
464				alloc.TaskGroup,
465				*alloc.Job.Version,
466				alloc.DesiredStatus,
467				alloc.ClientStatus,
468				formatUnixNanoTime(alloc.CreateTime),
469				formatUnixNanoTime(alloc.ModifyTime))
470		}
471	} else {
472		allocs[0] = "ID|Node ID|Task Group|Version|Desired|Status|Created|Modified"
473		for i, alloc := range allocations {
474			now := time.Now()
475			createTimePretty := prettyTimeDiff(time.Unix(0, alloc.CreateTime), now)
476			modTimePretty := prettyTimeDiff(time.Unix(0, alloc.ModifyTime), now)
477			allocs[i+1] = fmt.Sprintf("%s|%s|%s|%d|%s|%s|%s|%s",
478				limit(alloc.ID, uuidLength),
479				limit(alloc.NodeID, uuidLength),
480				alloc.TaskGroup,
481				*alloc.Job.Version,
482				alloc.DesiredStatus,
483				alloc.ClientStatus,
484				createTimePretty,
485				modTimePretty)
486		}
487	}
488
489	return formatList(allocs)
490}
491
492// outputJobSummary displays the given jobs summary and children job summary
493// where appropriate
494func (c *JobStatusCommand) outputJobSummary(client *api.Client, job *api.Job) error {
495	// Query the summary
496	summary, _, err := client.Jobs().Summary(*job.ID, nil)
497	if err != nil {
498		return fmt.Errorf("Error querying job summary: %s", err)
499	}
500
501	if summary == nil {
502		return nil
503	}
504
505	periodic := job.IsPeriodic()
506	parameterizedJob := job.IsParameterized()
507
508	// Print the summary
509	if !periodic && !parameterizedJob {
510		c.Ui.Output(c.Colorize().Color("\n[bold]Summary[reset]"))
511		summaries := make([]string, len(summary.Summary)+1)
512		summaries[0] = "Task Group|Queued|Starting|Running|Failed|Complete|Lost"
513		taskGroups := make([]string, 0, len(summary.Summary))
514		for taskGroup := range summary.Summary {
515			taskGroups = append(taskGroups, taskGroup)
516		}
517		sort.Strings(taskGroups)
518		for idx, taskGroup := range taskGroups {
519			tgs := summary.Summary[taskGroup]
520			summaries[idx+1] = fmt.Sprintf("%s|%d|%d|%d|%d|%d|%d",
521				taskGroup, tgs.Queued, tgs.Starting,
522				tgs.Running, tgs.Failed,
523				tgs.Complete, tgs.Lost,
524			)
525		}
526		c.Ui.Output(formatList(summaries))
527	}
528
529	// Always display the summary if we are periodic or parameterized, but
530	// only display if the summary is non-zero on normal jobs
531	if summary.Children != nil && (parameterizedJob || periodic || summary.Children.Sum() > 0) {
532		if parameterizedJob {
533			c.Ui.Output(c.Colorize().Color("\n[bold]Parameterized Job Summary[reset]"))
534		} else {
535			c.Ui.Output(c.Colorize().Color("\n[bold]Children Job Summary[reset]"))
536		}
537		summaries := make([]string, 2)
538		summaries[0] = "Pending|Running|Dead"
539		summaries[1] = fmt.Sprintf("%d|%d|%d",
540			summary.Children.Pending, summary.Children.Running, summary.Children.Dead)
541		c.Ui.Output(formatList(summaries))
542	}
543
544	return nil
545}
546
547// outputReschedulingEvals displays eval IDs and time for any
548// delayed evaluations by task group
549func (c *JobStatusCommand) outputReschedulingEvals(client *api.Client, job *api.Job, allocListStubs []*api.AllocationListStub, uuidLength int) error {
550	// Get the most recent alloc ID by task group
551
552	mostRecentAllocs := make(map[string]*api.AllocationListStub)
553	for _, alloc := range allocListStubs {
554		a, ok := mostRecentAllocs[alloc.TaskGroup]
555		if !ok || alloc.ModifyTime > a.ModifyTime {
556			mostRecentAllocs[alloc.TaskGroup] = alloc
557		}
558	}
559
560	followUpEvalIds := make(map[string]string)
561	for tg, alloc := range mostRecentAllocs {
562		if alloc.FollowupEvalID != "" {
563			followUpEvalIds[tg] = alloc.FollowupEvalID
564		}
565	}
566
567	if len(followUpEvalIds) == 0 {
568		return nil
569	}
570	// Print the reschedule info section
571	var delayedEvalInfos []string
572
573	taskGroups := make([]string, 0, len(followUpEvalIds))
574	for taskGroup := range followUpEvalIds {
575		taskGroups = append(taskGroups, taskGroup)
576	}
577	sort.Strings(taskGroups)
578	var evalDetails []string
579	first := true
580	for _, taskGroup := range taskGroups {
581		evalID := followUpEvalIds[taskGroup]
582		evaluation, _, err := client.Evaluations().Info(evalID, nil)
583		// Eval time is not critical output,
584		// so don't return it on errors, if its not set, or its already in the past
585		if err != nil || evaluation.WaitUntil.IsZero() || time.Now().After(evaluation.WaitUntil) {
586			continue
587		}
588		evalTime := prettyTimeDiff(evaluation.WaitUntil, time.Now())
589		if c.verbose {
590			if first {
591				delayedEvalInfos = append(delayedEvalInfos, "Task Group|Reschedule Policy|Eval ID|Eval Time")
592			}
593			rp := job.LookupTaskGroup(taskGroup).ReschedulePolicy
594			evalDetails = append(evalDetails, fmt.Sprintf("%s|%s|%s|%s", taskGroup, rp.String(), limit(evalID, uuidLength), evalTime))
595		} else {
596			if first {
597				delayedEvalInfos = append(delayedEvalInfos, "Task Group|Eval ID|Eval Time")
598			}
599			evalDetails = append(evalDetails, fmt.Sprintf("%s|%s|%s", taskGroup, limit(evalID, uuidLength), evalTime))
600		}
601		first = false
602	}
603	if len(evalDetails) == 0 {
604		return nil
605	}
606	// Only show this section if there is pending evals
607	delayedEvalInfos = append(delayedEvalInfos, evalDetails...)
608	c.Ui.Output(c.Colorize().Color("\n[bold]Future Rescheduling Attempts[reset]"))
609	c.Ui.Output(formatList(delayedEvalInfos))
610	return nil
611}
612
613func (c *JobStatusCommand) outputFailedPlacements(failedEval *api.Evaluation) {
614	if failedEval == nil || len(failedEval.FailedTGAllocs) == 0 {
615		return
616	}
617
618	c.Ui.Output(c.Colorize().Color("\n[bold]Placement Failure[reset]"))
619
620	sorted := sortedTaskGroupFromMetrics(failedEval.FailedTGAllocs)
621	for i, tg := range sorted {
622		if i >= maxFailedTGs {
623			break
624		}
625
626		c.Ui.Output(fmt.Sprintf("Task Group %q:", tg))
627		metrics := failedEval.FailedTGAllocs[tg]
628		c.Ui.Output(formatAllocMetrics(metrics, false, "  "))
629		if i != len(sorted)-1 {
630			c.Ui.Output("")
631		}
632	}
633
634	if len(sorted) > maxFailedTGs {
635		trunc := fmt.Sprintf("\nPlacement failures truncated. To see remainder run:\nnomad eval-status %s", failedEval.ID)
636		c.Ui.Output(trunc)
637	}
638}
639
640// list general information about a list of jobs
641func createStatusListOutput(jobs []*api.JobListStub) string {
642	out := make([]string, len(jobs)+1)
643	out[0] = "ID|Type|Priority|Status|Submit Date"
644	for i, job := range jobs {
645		out[i+1] = fmt.Sprintf("%s|%s|%d|%s|%s",
646			job.ID,
647			getTypeString(job),
648			job.Priority,
649			getStatusString(job.Status, &job.Stop),
650			formatTime(time.Unix(0, job.SubmitTime)))
651	}
652	return formatList(out)
653}
654
655func getTypeString(job *api.JobListStub) string {
656	t := job.Type
657
658	if job.Periodic {
659		t += "/periodic"
660	}
661
662	if job.ParameterizedJob {
663		t += "/parameterized"
664	}
665
666	return t
667}
668
669func getStatusString(status string, stop *bool) string {
670	if stop != nil && *stop {
671		return fmt.Sprintf("%s (stopped)", status)
672	}
673	return status
674}
675