1#!/usr/bin/env expect
2############################################################################
3# Purpose:  Test lua JobSubmitPlugin
4############################################################################
5# Copyright (C) 2019 SchedMD LLC
6# Written by Nathan Rini
7#
8# This file is part of Slurm, a resource management program.
9# For details, see <https://slurm.schedmd.com/>.
10# Please also read the included file: DISCLAIMER.
11#
12# Slurm is free software; you can redistribute it and/or modify it under
13# the terms of the GNU General Public License as published by the Free
14# Software Foundation; either version 2 of the License, or (at your option)
15# any later version.
16#
17# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
18# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
20# details.
21#
22# You should have received a copy of the GNU General Public License along
23# with Slurm; if not, write to the Free Software Foundation, Inc.,
24# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
25############################################################################
26source ./globals
27source ./globals_accounting
28
29set test_lua_reject	"test$test_id\_scripts/reject.lua"
30set test_lua_pass	"test$test_id\_scripts/pass.lua"
31set exit_code		 0
32set cwd			"[$bin_pwd]"
33set job_name		"test$test_id"
34set is_skip              0
35
36set ta1	    "test$test_id-account.1"
37set tu1     [get_my_user_name]
38set tq1     "test$test_id-qos.1"
39set tq2     "test$test_id-qos.2"
40
41# account options
42array set acct_1 {}
43set acct_1(Organization)  "Account_Org_A1"
44set acct_1(Description)   "Test_Account_A1"
45set acct_1(Qos) $tq1
46set acct_1(Cluster) [get_config_param "ClusterName"]
47
48# user options
49array set user_req_1 {}
50set user_req_1(Account) $ta1
51set user_req_1(Qos) "$tq1,$tq2"
52
53# qos options
54array set qos_1 {}
55set qos_1(Description)	  "test_qos_1"
56set qos_1(flags)          "denyonlimit"
57set qos_1(maxtresperuser) "cpu=1"
58array set qos_2 {}
59set qos_2(Description)    "test_qos_2"
60
61set access_err  0
62
63set timeout $max_job_delay
64
65# Create test assoc and accounts
66proc create_accounts {} {
67	global ta1 acct_1 tq1 tq2 tu1 user_req_1 qos_1 qos_2
68	log_info "create account and QOS"
69
70	# Create test assoc and accounts
71	check_rc [add_qos $tq1 [array get qos_1]]
72	check_rc [add_qos $tq2 [array get qos_2]]
73	check_rc [add_acct $ta1 [array get acct_1]]
74	check_rc [add_user $tu1 [array get user_req_1]]
75}
76
77# Cleanup test assoc and accounts
78proc cleanup_accounts {} {
79	global ta1 tq1 tq2
80
81	#wait_for_account_done $ta1,$ta2
82
83	log_info "remove QOS: $tq1, $tq2"
84	remove_qos $tq1,$tq2
85
86	log_info "remove account: $ta1"
87	remove_acct "" $ta1
88}
89
90proc check_rc { exit_code } {
91	if {$exit_code != 0} {
92		fail "Exiting with exit code $exit_code"
93	}
94}
95
96proc test_bin_pass { num bin args het_job count } {
97	global job_name salloc ta1 tq2 srun number eol is_skip bin_true
98
99	set got_init 0
100	set got_sub1 0
101	set got_sub2 0
102	set got_sub3 0
103	set rc -12345
104	set is_builtin [string equal [get_config_param "SchedulerType"] "sched/builtin"]
105
106	log_info "**** TEST PASS $num ****"
107	eval spawn $bin [join $args " "]
108	set pid [exp_pid]
109	expect {
110		-re "^(srun|salloc|sbatch): error: .*: Requested operation not supported on this system$eol" {
111			if { $het_job && $is_builtin } {
112				log_warn "Hetjobs are not supported with sched/builtin this failure is expected"
113				set is_skip 1
114				return 0
115			}
116		}
117		-re "^(srun|salloc|sbatch): initialized$eol" {
118			incr got_init
119			exp_continue
120		}
121		# salloc: 0: submit1
122		-re "^(srun|salloc|sbatch): ($number: |)submit1$eol" {
123			if {!$het_job || $expect_out(2,string) != ""} {
124				incr got_sub1
125			}
126			exp_continue
127		}
128		-re "^(srun|salloc|sbatch): ($number: |)submit2$eol" {
129			if {!$het_job || $expect_out(2,string) != ""} {
130				incr got_sub2
131			}
132			exp_continue
133		}
134		-re "^(srun|salloc|sbatch): ($number: |)submit3$eol" {
135			if {!$het_job || $expect_out(2,string) != ""} {
136				incr got_sub3
137			}
138			exp_continue
139		}
140		timeout {
141			slow_kill [expr 0 - $pid]
142			fail "$bin not responding"
143		}
144		eof {
145			lassign [wait] pid spawnid os_error_flag rc
146		}
147	}
148
149	log_info "$bin rc:$rc init:$got_init sub1:$got_sub1 sub2:$got_sub2 sub3:$got_sub3 expected:$count"
150
151	if {($rc != 0) ||
152	    ($got_sub1 != $count) ||
153	    ($got_sub2 != $count) ||
154	    ($got_sub3 != $count)} {
155			fail "Invalid $bin response"
156	}
157}
158
159proc test_bin_fail { num bin args het_job count } {
160	global job_name ta1 tq1 tq2 srun number eol is_skip bin_true
161
162	set got_init 0
163	set got_sub1 0
164	set got_sub2 0
165	set got_sub3 0
166	set rc -12345
167	set is_builtin [string equal [get_config_param "SchedulerType"] "sched/builtin"]
168
169	log_info "**** TEST FAIL $num ****"
170	eval spawn $bin [join $args " "]
171	set pid [exp_pid]
172	expect {
173		-re "^(srun|salloc|sbatch): error: .*: Requested operation not supported on this system$eol" {
174			if { $het_job && $is_builtin } {
175				log_warn "Hetjobs are not supported with sched/builtin this failure is expected"
176				set is_skip 1
177				return 0
178			}
179		}
180		-re "^(srun|salloc|sbatch): error: initialized$eol" {
181			incr got_init
182			exp_continue
183		}
184		#srun: error: submit1\r\n
185		#salloc: error: 0: submit1\r\n
186		-re "^(srun|salloc|sbatch): error: ($number: |)submit1$eol" {
187			if {!$het_job || $expect_out(2,string) != ""} {
188				incr got_sub1
189			}
190			exp_continue
191		}
192		-re "^(srun|salloc|sbatch): error: ($number: |)submit2$eol" {
193			if {!$het_job || $expect_out(2,string) != ""} {
194				incr got_sub2
195			}
196			exp_continue
197		}
198		-re "^(srun|salloc|sbatch): error: ($number: |)submit3$eol" {
199			if {!$het_job || $expect_out(2,string) != ""} {
200				incr got_sub3
201			}
202			exp_continue
203		}
204		timeout {
205			slow_kill [expr 0 - $pid]
206			fail "$bin not responding"
207		}
208		eof {
209			lassign [wait] pid spawnid os_error_flag rc
210		}
211	}
212
213	log_info "$bin rc:$rc init:$got_init sub1:$got_sub1 sub2:$got_sub2 sub3:$got_sub3 expected:$count"
214
215	if {($rc == 0) ||
216	    ($got_sub1 != $count) ||
217	    ($got_sub2 != $count) ||
218	    ($got_sub3 != $count)} {
219		fail "Invalid $bin response"
220	}
221}
222
223if {![have_lua]} {
224	skip "LUA must be installed and enabled to test lua job_submit plugin."
225}
226
227if {[get_config_param "AccountingStorageType"] ne "accounting_storage/slurmdbd"} {
228	skip "This test can't be run without a usable AccountStorageType"
229}
230if {![param_contains [get_config_param "AccountingStorageEnforce"] "limits"]} {
231	skip "This test can't be run without enforcing limits"
232}
233
234# Verify cluster is able to run largest test job
235set nodes [get_nodes_by_request "--ntasks-per-node=5 -N3"]
236if { [llength $nodes] != 3 } {
237	skip "System too small for test, it needs '--ntasks-per-node=5 -N3'"
238}
239
240proc cleanup {} {
241	global config_dir
242
243	cleanup_accounts
244	restore_conf $config_dir/job_submit.lua
245	restore_conf $config_dir/slurm.conf
246	reconfigure
247}
248
249cleanup_accounts
250create_accounts
251
252set config_dir [get_conf_path]
253save_conf $config_dir/job_submit.lua
254save_conf $config_dir/slurm.conf
255
256# Activate lua plugin
257exec $bin_sed -i {s/^\(JobSubmitPlugins\)/#\1/gI} $config_dir/slurm.conf
258exec $bin_echo "\n### test7.20 additions####\nJobSubmitPlugins=lua" >> $config_dir/slurm.conf
259reconfigure -fail
260
261run_command "$bin_rm $config_dir/job_submit.lua"
262run_command -fail "$bin_cp $test_lua_reject $config_dir/job_submit.lua"
263# Sleep for 1 second to make sure that modify time is different from last copy.
264sleep 1
265file mtime $config_dir/job_submit.lua [timestamp]
266
267# Check that all job types are rejected
268test_bin_fail "R1" $salloc {-t1 -J $job_name -A $ta1 --qos $tq2 -n5 "$bin_true"} 0 1
269test_bin_fail "R2" $salloc {-t1 -J $job_name -A $ta1 --qos $tq2 -n5 : -n3 : -n1 "$bin_true"} 1 1
270test_bin_fail "R3" $srun {-t1 -J $job_name --mpi=none -A $ta1 --qos $tq2 -n5 $bin_true} 0 1
271test_bin_fail "R4" $srun {-t1 -J $job_name --mpi=none -A $ta1 --qos $tq2 -n5 : -n3 : -n1 $bin_true} 1 1
272test_bin_fail "R5" $sbatch {-t1 -J $job_name -o /dev/null -W -A $ta1 --qos $tq2 -n5 --wrap $bin_true} 0 1
273test_bin_fail "R6" $sbatch {-t1 -J $job_name -o /dev/null -W -A $ta1 --qos $tq2 -n5 --array 10 --wrap $bin_true} 0 1
274test_bin_fail "R7" $sbatch {-t1 -J $job_name -o /dev/null -W -A $ta1 --qos $tq2 -n5 : -n3 : -n1 --wrap $bin_true} 1 1
275test_bin_fail "R8" $sbatch {-t1 -J $job_name --comment=PASS -o /dev/null -W -A $ta1 --qos $tq2 -n1 : --comment=ERROR -n3 : --comment=ERROR -n5 --wrap $bin_true} 1 2
276
277run_command -fail "$bin_cp $test_lua_pass $config_dir/job_submit.lua"
278# Sleep for 1 second to make sure that modify time is different from last copy.
279sleep 1
280file mtime $config_dir/job_submit.lua [timestamp]
281
282# Check that passing works
283test_bin_pass "P1" $salloc {-t1 -J $job_name -A $ta1 --qos $tq2 -n5 "$bin_true"} 0 1
284test_bin_pass "P2" $salloc {-t1 -J $job_name -A $ta1 --qos $tq2 -n5 : -n3 : -n1 "$bin_true"} 1 3
285test_bin_pass "P3" $srun {-t1 -J $job_name --mpi=none -A $ta1 --qos $tq2 -n5 $bin_true} 0 1
286test_bin_pass "P4" $srun {-t1 -J $job_name --mpi=none -A $ta1 --qos $tq2 -n5 : -n3 : -n1 $bin_true} 1 3
287test_bin_pass "P5" $sbatch {-t1 -J $job_name -o /dev/null -W -A $ta1 --qos $tq2 -n5 --wrap $bin_true} 0 1
288test_bin_pass "P6" $sbatch {-t1 -J $job_name -o /dev/null -W -A $ta1 --qos $tq2 -n5 --array 10 --wrap $bin_true} 0 1
289test_bin_pass "P7" $sbatch {-t1 -J $job_name -o /dev/null -W -A $ta1 --qos $tq2 -n5 : -n3 : -n1 --wrap $bin_true} 1 3
290
291# Check that messages are still sent with failing QOS but passing filter
292test_bin_fail "F1" $salloc {-t1 -J $job_name -A $ta1 --qos $tq1 -n5 "$bin_true"} 0 1
293test_bin_fail "F2" $salloc {-t1 -J $job_name -A $ta1 --qos $tq1 -n5 : -n3 : -n1 "$bin_true"} 1 1
294test_bin_fail "F3" $srun {-t1 -J $job_name --mpi=none -A $ta1 --qos $tq1 -n5 $bin_true} 0 1
295test_bin_fail "F4" $srun {-t1 -J $job_name --mpi=none -A $ta1 --qos $tq1 -n5 : -n3 : -n1 $bin_true} 1 1
296test_bin_fail "F5" $sbatch {-t1 -J $job_name -o /dev/null -W -A $ta1 --qos $tq1 -n5 --wrap $bin_true} 0 1
297test_bin_fail "F6" $sbatch {-t1 -J $job_name -o /dev/null -W -A $ta1 --qos $tq1 -n5 --array 10 --wrap $bin_true} 0 1
298test_bin_fail "F7" $sbatch {-t1 -J $job_name -o /dev/null -W -A $ta1 --qos $tq1 -n5 : -n3 : -n1 --wrap $bin_true} 1 3
299
300if {$exit_code} {
301	fail "Test failed due to previous errors (\$exit_code = $exit_code)"
302}
303if {$is_skip} {
304	skip "Some subtests were skipped"
305}
306