1#!/usr/bin/perl
2###############################################################################
3#
4#  sjstat - List attributes of jobs under SLURM control
5#
6###############################################################################
7#  Copyright (C) 2007 The Regents of the University of California.
8#  Copyright (C) 2008-2009 Lawrence Livermore National Security.
9#  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
10#  Written by Phil Eckert <eckert21@llnl.gov>.
11#  CODE-OCEC-09-009. All rights reserved.
12#
13#  This file is part of SLURM, a resource management program.
14#  For details, see <https://slurm.schedmd.com/>.
15#  Please also read the included file: DISCLAIMER.
16#
17#  SLURM is free software; you can redistribute it and/or modify it under
18#  the terms of the GNU General Public License as published by the Free
19#  Software Foundation; either version 2 of the License, or (at your option)
20#  any later version.
21#
22#  In addition, as a special exception, the copyright holders give permission
23#  to link the code of portions of this program with the OpenSSL library under
24#  certain conditions as described in each individual source file, and
25#  distribute linked combinations including the two. You must obey the GNU
26#  General Public License in all respects for all of the code used other than
27#  OpenSSL. If you modify file(s) with this exception, you may extend this
28#  exception to your version of the file(s), but you are not obligated to do
29#  so. If you do not wish to do so, delete this exception statement from your
30#  version.  If you delete this exception statement from all source files in
31#  the program, then also delete it here.
32#
33#  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
34#  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
35#  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
36#  details.
37#
38#  You should have received a copy of the GNU General Public License along
39#  with SLURM; if not, write to the Free Software Foundation, Inc.,
40#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
41#
42#  Based off code with permission copyright 2006, 2007 Cluster Resources, Inc.
43###############################################################################
44
45#
46# Man page stuff.
47#
48BEGIN {
49    # Just dump the man page in *roff format and exit if --roff specified.
50    foreach my $arg (@ARGV) {
51        if ($arg eq "--") {
52            last;
53        } elsif ($arg eq "--roff") {
54            use Pod::Man;
55            my $parser = Pod::Man->new (section => 1);
56            $parser->parse_from_file($0, \*STDOUT);
57            exit 0;
58        }
59    }
60}
61
62use strict;
63use Getopt::Long 2.24 qw(:config no_ignore_case);
64use autouse 'Pod::Usage' => qw(pod2usage);
65
66#
67#	Global Variables.
68#
69	my ($help, $man, $pool, $running, $verbose);
70	my (%MaxNodes, %MaxTime);
71
72#
73#	Check SLURM status.
74#
75	isslurmup();
76
77#
78#	Get user options.
79#
80	get_options();
81
82#
83#	Get partition information from scontrol, used
84#	currently in conjunction with the sinfo data..
85#
86	do_scontrol_part();
87
88#
89#	Get and display the sinfo data.
90#
91	do_sinfo();
92
93#
94#	If the -c option was entered, stop here.
95#
96	exit if ($pool);
97
98#
99#	Get and display the squeue data.
100#
101	do_squeue();
102
103	exit;
104
105
106#
107# Get the SLURM partitions information.
108#
109sub do_sinfo
110{
111
112	my (@s_part, @s_mem, @s_cpu, @s_feat, @s_active, @s_idle,
113	    @s_out, @s_total, @s_usable);
114#
115#	Get the partition and node info.
116#
117       my $options = "\"%9P %7m %.4c %.22F %f\"";
118
119	my $ct = 0;
120	my @sin = `sinfo -e -o $options`;
121	foreach my $tmp (@sin) {
122		next if ($tmp =~ /^PARTITION/);
123		chomp $tmp;
124		my @line = split(' ',$tmp);
125		$s_part[$ct] = $line[0];
126		$s_mem[$ct]  = $line[1];
127		$s_cpu[$ct]  = $line[2];
128#
129#		Split the status into various components.
130#
131		my @fields = split(/\//, $line[3]);
132			$s_active[$ct] = $fields[0];
133			$s_idle[$ct]   = $fields[1];
134			$s_out[$ct]    = $fields[2];
135			$s_total[$ct]  = $fields[3];
136
137		$s_usable[$ct] = $s_total[$ct] - $s_out[$ct];
138
139		$s_feat[$ct] = ($line[4] .= " ");
140		$s_feat[$ct] =~ s/\(null\)//g;
141		$ct++;
142	}
143
144	printf("\nScheduling pool data:\n");
145	if ($verbose) {
146		printf("----------------------------------------------------------------------------------\n");
147		printf("                           Total  Usable   Free   Node   Time      Other          \n");
148		printf("Pool         Memory  Cpus  Nodes   Nodes  Nodes  Limit  Limit      traits         \n");
149		printf("----------------------------------------------------------------------------------\n");
150	} else {
151		printf("-------------------------------------------------------------\n");
152		printf("Pool        Memory  Cpus  Total Usable   Free  Other Traits  \n");
153		printf("-------------------------------------------------------------\n");
154	}
155
156	for (my $i = 0; $i < $ct; $i++) {
157		if ($verbose) {
158			my $p = $s_part[$i];
159			$p =~ s/\*//;
160                       printf("%-9s  %7dMb %5s %6s %7s %6s %6s %10s  %-s\n",
161				$s_part[$i], $s_mem[$i], $s_cpu[$i],
162				$s_total[$i], $s_usable[$i],
163				$s_idle[$i], $MaxNodes{$p},
164				$MaxTime{$p}, $s_feat[$i]);
165		} else {
166                       printf("%-9s %7dMb %5s %6s %6s %6s  %-s\n",
167				$s_part[$i], $s_mem[$i], $s_cpu[$i],
168				$s_total[$i], $s_usable[$i],
169				$s_idle[$i], $s_feat[$i]);
170		}
171	}
172	printf("\n");
173
174	return;
175}
176
177
178#
179# Get the SLURM queues.
180#
181sub do_squeue
182{
183
184	my (@s_job, @s_user, @s_nodes, @s_status, @s_begin, @s_limit,
185	    @s_start, @s_pool, @s_used, @s_master);
186#
187#	Base options on whether this partition is node or process scheduled.
188#
189	my ($type, $options);
190	my $rval = system("scontrol show config | grep cons_res >> /dev/null");
191	if ($rval) {
192        	$type = "Nodes";
193		$options =  "\"%8i  %8u %.6D %2t %S %.12l  %.9P %.11M  %1000R\"";
194	} else {
195        	$type = "Procs";
196		$options =  "\"%8i  %8u %.6C %2t %S %.12l  %.9P %.11M  %1000R\"";
197	}
198
199#
200#	Get the job information.
201#
202
203	my $ct = 0;
204	my $pat = "tr -s '[' '\000'  |cut -d'-' -f 1 | cut -d',' -f 1";
205	my @sout = `squeue -o $options`;
206	foreach my $tmp (@sout) {
207		next if ($tmp =~ /^JOBID/);
208		next if ($running && $tmp =~ / PD /);
209		chomp $tmp;
210		my @line = split(' ', $tmp);
211		$s_job[$ct]    = $line[0];
212		$s_user[$ct]   = $line[1];
213		$s_nodes[$ct]  = $line[2];
214		$s_status[$ct] = $line[3];
215		$line[4] =~ s/^.....//;
216		$line[4] = "N/A" if ($line[3] =~ /PD/);
217		$s_begin[$ct]  = $line[4];
218		$s_limit[$ct]  = $line[5];
219		if ($line[5] eq "UNLIMITED") {
220			$s_limit[$ct] = $line[5];
221		} else {
222			$s_limit[$ct] = convert_time($line[5]);
223		}
224
225		$s_pool[$ct] = $line[6];
226		$s_used[$ct] = $line[7];
227#
228#		Only keep the master node from the nodes list.
229#
230		$line[8] =~ s/\[([0-9.]*).*/$1/;
231		$s_master[$ct] = $line[8];
232		$ct++;
233	}
234
235
236	printf("Running job data:\n");
237
238	if ($verbose) {
239		printf("---------------------------------------------------------------------------------------------------\n");
240		printf("                                                 Time        Time            Time                  \n");
241		printf("JobID    User      $type Pool      Status        Used       Limit         Started  Master/Other    \n");
242		printf("---------------------------------------------------------------------------------------------------\n");
243	} else {
244		printf("----------------------------------------------------------------------\n");
245		printf("JobID    User      $type Pool      Status        Used  Master/Other   \n");
246		printf("----------------------------------------------------------------------\n");
247	}
248
249	for (my $i = 0; $i < $ct; $i++) {
250		if ($verbose) {
251			printf("%-8s %-8s %6s %-9s %-7s %10s %11s  %14s  %.12s\n",
252				$s_job[$i], $s_user[$i], $s_nodes[$i],
253				$s_pool[$i], $s_status[$i],
254				$s_used[$i], $s_limit[$i], $s_begin[$i],
255				$s_master[$i]);
256		} else {
257			printf("%-8s %-8s %6s %-9s %-7s %10s  %.12s\n",
258				$s_job[$i], $s_user[$i], $s_nodes[$i],
259				$s_pool[$i],  $s_status[$i],
260				$s_used[$i], $s_master[$i]);
261		}
262	}
263	printf("\n");
264
265	return;
266}
267
268#
269# Get the SLURM partitions.
270#
271sub do_scontrol_part
272{
273
274#
275#	Get All partition data Don't need it all now, but
276#	it may be useful later.
277#
278	my @scon = `scontrol show part`;
279	my $part;
280	foreach my $tmp (@scon) {
281		chomp $tmp;
282		my @line = split(' ',$tmp);
283		($part) = ($tmp =~ m/PartitionName=(\S+)/) if ($tmp =~ /PartitionName=/);
284
285		($MaxTime{$part})  = ($tmp =~ m/MaxTime=(\S+)\s+/)  if ($tmp =~ /MaxTime=/);
286		($MaxNodes{$part}) = ($tmp =~ m/MaxNodes=(\S+)\s+/) if ($tmp =~ /MaxNodes=/);
287		$MaxTime{$part}  =~ s/UNLIMITED/UNLIM/ if ($MaxTime{$part});
288		$MaxNodes{$part} =~ s/UNLIMITED/UNLIM/ if ($MaxNodes{$part});
289	}
290
291	return;
292}
293
294
295#
296# Show the man page.
297#
298sub show_man
299{
300
301	if ($< == 0) {    # Cannot invoke perldoc as root
302		my $id = eval { getpwnam("nobody") };
303		$id = eval { getpwnam("nouser") } unless defined $id;
304		$id = -2                          unless defined $id;
305		$<  = $id;
306		printf("\n You can not do this as root!\n\n");
307		exit 1;
308	}
309	$> = $<;                         # Disengage setuid
310	$ENV{PATH} = "/bin:/usr/bin";    # Untaint PATH
311	delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
312	if ($0 =~ /^([-\/\w\.]+)$/) { $0 = $1; }    # Untaint $0
313	else { die "Illegal characters were found in \$0 ($0)\n"; }
314	pod2usage(-exitstatus => 0, -verbose => 2);
315
316	return;
317}
318
319
320#
321# Convert the time to a better format.
322#
323sub convert_time
324{
325	my $val = shift(@_);
326
327	my $tmp;
328	my @field = split(/-|:/, $val);
329	if (@field == 4) {
330		$tmp = ($field[0]*24)+$field[1] . ':'.$field[2] . ':' . $field[3];
331	} else {
332		$tmp = sprintf("%8s",$val);
333	}
334
335	return($tmp);
336}
337
338
339#
340# Get options.
341#
342sub get_options
343{
344	GetOptions(
345		'help|h|?' => \$help,
346		'man'      => \$man,
347		'v'        => \$verbose,
348		'r'        => \$running,
349		'c'        => \$pool,
350  	) or usage(1);
351
352	show_man() if ($man);
353	usage(0)   if ($help);
354
355	return;
356}
357
358
359#
360# Usage.
361#
362sub usage
363{
364	my $eval = shift(@_);
365
366#
367#	Print usage instructions and exit.
368#
369	print STDERR "\nUsage: sjstat [-h] [-c] p\[-man] [-r] [-v]\n";
370
371	printf("\
372   -h	shows usage.
373   -c	shows computing resources info only.
374   -man	shows man page.
375   -r	show only running jobs.
376   -v	is for the verbose mode.\n
377
378   Output is very similar to that of squeue.
379	\n\n");
380
381	exit($eval);
382}
383
384
385#
386# Determine if SLURM is available.
387#
388sub isslurmup
389{
390	my $out = `scontrol show part 2>&1`;
391	if ($?) {
392		printf("\n SLURM is not communicating.\n\n");
393		exit(1);
394	}
395
396	return;
397}
398
399
400__END__
401
402=head1 NAME
403
404B<sjstat> - List attributes of jobs under the SLURM control
405
406=head1 SYNOPSIS
407
408B<sjstat> [B<-h> ] [B<-c>] [B<-r> ] [B<-v>]
409
410=head1 DESCRIPTION
411
412The B<sjstat> command is used to display statistics of jobs under control of SLURM.
413The output is designed to give information on the resource usage and availablilty,
414as well as information about jobs that are currently active on the machine. This output
415is built using the SLURM utilities, sinfo, squeue and scontrol, the man pages for these
416utilites will provide more information and greater depth of understanding.
417
418=head1 OPTIONS
419
420=over 4
421
422=item B<-h>
423
424Display a brief help message
425
426=item B<-c>
427
428Display the computing resource information only.
429
430=item B<-man>
431
432Show the man page.
433
434=item B<-r>
435
436Display only the running jobs.
437
438=item B<-v>
439
440Display more verbose information.
441
442=back
443
444=head1 EXAMPLE
445
446The following is a basic request for status.
447
448    > sjstat
449
450     Scheduling pool data:
451     ------------------------------------------------------------
452     Pool         Memory  Cpus  Total Usable   Free  Other Traits
453     ------------------------------------------------------------
454     pdebug      15000Mb     8     32     32     24  (null)
455     pbatch*     15000Mb     8   1072   1070    174  (null)
456
457
458     Running job data:
459     -------------------------------------------------------------------
460     JobID    User      Nodes Pool       Status        Used Master/Other
461     -------------------------------------------------------------------
462     395      mary       1000 pbatch     PD            0:00 (JobHeld)
463     396      mary       1000 pbatch     PD            0:00 (JobHeld)
464     375      sam        1000 pbatch     CG            0:00 (JobHeld)
465     388      fred         32 pbatch     R            25:27 atlas89
466     361      harry       512 pbatch     R          1:01:12 atlas618
467     1077742  sally         8 pdebug     R            20:16 atlas18
468
469
470     The Scheduling data contains information pertaining to the:
471
472 	Pool  	  a set of nodes
473 	Memory	  the amount of memory on each node
474 	Cpus	  the number of cpus on each node
475 	Total	  the total number of nodes in the pool
476 	Usable	  total usaable nodes in the pool
477 	Free	  total nodes that are currently free
478
479     The Running job data contains information pertaining to the:
480
481 	JobID		the SLURM job id
482 	User		owner of the job
483 	Nodes		nodes required, or in use by the job
484			(Note: On cpu scheduled machines, this field
485			will be labled "Procs" show the number of processors
486			the job is using.)
487 	Pool 		the Pool  required or in use by the job
488 	Status		current status of the job
489 	Used 		Wallclick time used by the job
490 	Master/Other 	Either the Master (head) node used by the job, or may
491			indicate furhter status of a pending, or completing job.
492
493     The common status values are:
494
495 	R	The job is running
496	PD	The job is Pending
497	CG	The job is Completing
498
499     These are states reproted by SLURM and more elaborate docuemntation
500     can be found in the squeue/sinfo man pages.
501
502
503 An example of the -v option.
504
505     Scheduling pool data:
506     -----------------------------------------------------------------------------
507                                Total  Usable   Free   Node   Time  Other
508     Pool         Memory  Cpus  Nodes   Nodes  Nodes  Limit  Limit  Traits
509     -----------------------------------------------------------------------------
510     pdebug      15000Mb     8     32      32     24     16     30  (null)
511     pbatch*     15000Mb     8   1072    1070    174  UNLIM  UNLIM  (null)
512
513     Running job data:
514     ---------------------------------------------------------------------------------------------------
515                                                      Time        Time            Time
516     JobID    User      Nodes Pool      Status        Used       Limit         Started  Master/Other
517     ---------------------------------------------------------------------------------------------------
518     38562    tom           4 pbatch    PD            0:00     1:00:00  01-14T18:11:22  (JobHeld)
519
520     The added fields to the "Scheduling pool data" are:
521
522 	Node Limit	SLURM imposed node limit.
523 	Time Limit	SLURM imposed time limit, value in minutes.
524
525     The added fields to the "Running job data" are:
526
527 	Limit		Time limit of job.
528 	Start		Start time of job.
529
530=head1 REPORTING BUGS
531
532Report bugs to <eckert2@llnl.gov>
533
534=cut
535