1#!/usr/bin/env expect
2############################################################################
3# Purpose: Test of Slurm functionality
4#          Test some invalid combinations of --gpu options
5############################################################################
6# Copyright (C) 2018 SchedMD LLC
7# Written by Morris Jette
8#
9# This file is part of Slurm, a resource management program.
10# For details, see <https://slurm.schedmd.com/>.
11# Please also read the included file: DISCLAIMER.
12#
13# Slurm is free software; you can redistribute it and/or modify it under
14# the terms of the GNU General Public License as published by the Free
15# Software Foundation; either version 2 of the License, or (at your option)
16# any later version.
17#
18# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
19# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
20# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
21# details.
22#
23# You should have received a copy of the GNU General Public License along
24# with Slurm; if not, write to the Free Software Foundation, Inc.,
25# 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
26############################################################################
27source ./globals
28
29set exit_code   0
30set job_id      0
31
32set nb_nodes [get_partition_param [default_partition] "TotalNodes"]
33
34set gpu_cnt [get_highest_gres_count 1 "gpu"]
35if {$gpu_cnt < 1} {
36	skip "This test requires 1 or more GPUs on 1 node of the default partition"
37}
38log_debug "GPU count is $gpu_cnt"
39
40#
41# Request more GPUs per node than exist on a single node
42#
43log_info "==== TEST 1 ===="
44spawn $sbatch --gpus-per-node=[expr $gpu_cnt + 1] -N1 --output=/dev/null -t1 --wrap $bin_hostname
45expect {
46	-re "Submitted batch job ($number)" {
47		cancel_job $expect_out(1,string)
48		log_error "Batch request not rejected"
49		set exit_code 1
50		exp_continue
51	}
52	-re "error: " {
53		log_debug "Error is expected, no worries"
54		exp_continue
55	}
56	timeout {
57		fail "sbatch not responding"
58	}
59	eof {
60		wait
61	}
62}
63
64#
65# Specify 1 node and more GPUs than exist on a single node
66#
67log_info "==== TEST 2 ===="
68spawn $sbatch --gpus-per-node=$gpu_cnt --gres=[expr $gpu_cnt + 1] -N1 --output=/dev/null -t1 --wrap $bin_hostname
69expect {
70	-re "Submitted batch job ($number)" {
71		cancel_job $expect_out(1,string)
72		log_error "Batch request not rejected"
73		set exit_code 1
74		exp_continue
75	}
76	-re "error: " {
77		log_debug "Error is expected, no worries"
78		exp_continue
79	}
80	timeout {
81		fail "sbatch not responding"
82	}
83	eof {
84		wait
85	}
86}
87
88#
89# Specify inconsistent --cpus-per-task and --gpus-per-task/--cpus-per-gpu
90#
91log_info "==== TEST 3 ===="
92spawn $sbatch --gpus-per-task=1 --cpus-per-gpu=1 --cpus-per-task=2 -N1 --output=/dev/null -t1 --wrap $bin_hostname
93expect {
94	-re "Submitted batch job ($number)" {
95		cancel_job $expect_out(1,string)
96		log_error "Batch request not rejected"
97		set exit_code 1
98		exp_continue
99	}
100	-re "error: " {
101		log_debug "Error is expected, no worries"
102		exp_continue
103	}
104	timeout {
105		fail "sbatch not responding"
106	}
107	eof {
108		wait
109	}
110}
111
112#
113# Specify inconsistent tasks per node
114#
115log_info "==== TEST 4 ===="
116spawn $sbatch --gpus-per-task=1 --gpus-per-node=1 -n2 -N1 --output=/dev/null -t1 --wrap $bin_hostname
117expect {
118	-re "Submitted batch job ($number)" {
119		cancel_job $expect_out(1,string)
120		log_error "Batch request not rejected"
121		set exit_code 1
122		exp_continue
123	}
124	-re "error: " {
125		log_debug "Error is expected, no worries"
126		exp_continue
127	}
128	timeout {
129		fail "sbatch not responding"
130	}
131	eof {
132		wait
133	}
134}
135
136#
137# Specify gpus-per-socket, but no sockets-per-node count
138#
139log_info "==== TEST 5 ===="
140spawn $sbatch --gpus-per-socket=1 -N1 --output=/dev/null -t1 --wrap $bin_hostname
141expect {
142	-re "Submitted batch job ($number)" {
143		cancel_job $expect_out(1,string)
144		log_error "Batch request not rejected"
145		set exit_code 1
146		exp_continue
147	}
148	-re "error: " {
149		log_debug "Error is expected, no worries"
150		exp_continue
151	}
152	timeout {
153		fail "sbatch not responding"
154	}
155	eof {
156		wait
157	}
158}
159
160#
161# Specify inconsistent tasks per node (variation of test 4)
162#
163log_info "==== TEST 6 ===="
164spawn $sbatch --gpus-per-task=1 --gpus-per-node=2 --ntasks-per-node=1 -N1 --output=/dev/null -t1 --wrap $bin_hostname
165expect {
166	-re "Submitted batch job ($number)" {
167		cancel_job $expect_out(1,string)
168		log_error "Batch request not rejected"
169		set exit_code 1
170		exp_continue
171	}
172	-re "error: " {
173		log_debug "Error is expected, no worries"
174		exp_continue
175	}
176	timeout {
177		fail "sbatch not responding"
178	}
179	eof {
180		wait
181	}
182}
183if {$exit_code != 0} {
184	fail "Test failed due to previous errors (\$exit_code = $exit_code)"
185}
186