1#!/usr/bin/env expect 2############################################################################ 3# Purpose: Test lua JobSubmitPlugin 4############################################################################ 5# Copyright (C) 2019 SchedMD LLC 6# Written by Nathan Rini 7# 8# This file is part of Slurm, a resource management program. 9# For details, see <https://slurm.schedmd.com/>. 10# Please also read the included file: DISCLAIMER. 11# 12# Slurm is free software; you can redistribute it and/or modify it under 13# the terms of the GNU General Public License as published by the Free 14# Software Foundation; either version 2 of the License, or (at your option) 15# any later version. 16# 17# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY 18# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 19# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 20# details. 21# 22# You should have received a copy of the GNU General Public License along 23# with Slurm; if not, write to the Free Software Foundation, Inc., 24# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 25############################################################################ 26source ./globals 27source ./globals_accounting 28 29set test_lua_reject "test$test_id\_scripts/reject.lua" 30set test_lua_pass "test$test_id\_scripts/pass.lua" 31set exit_code 0 32set cwd "[$bin_pwd]" 33set job_name "test$test_id" 34set is_skip 0 35 36set ta1 "test$test_id-account.1" 37set tu1 [get_my_user_name] 38set tq1 "test$test_id-qos.1" 39set tq2 "test$test_id-qos.2" 40 41# account options 42array set acct_1 {} 43set acct_1(Organization) "Account_Org_A1" 44set acct_1(Description) "Test_Account_A1" 45set acct_1(Qos) $tq1 46set acct_1(Cluster) [get_config_param "ClusterName"] 47 48# user options 49array set user_req_1 {} 50set user_req_1(Account) $ta1 51set user_req_1(Qos) "$tq1,$tq2" 52 53# qos options 54array set qos_1 {} 55set qos_1(Description) "test_qos_1" 56set qos_1(flags) "denyonlimit" 57set qos_1(maxtresperuser) "cpu=1" 58array set qos_2 {} 59set qos_2(Description) "test_qos_2" 60 61set access_err 0 62 63set timeout $max_job_delay 64 65# Create test assoc and accounts 66proc create_accounts {} { 67 global ta1 acct_1 tq1 tq2 tu1 user_req_1 qos_1 qos_2 68 log_info "create account and QOS" 69 70 # Create test assoc and accounts 71 check_rc [add_qos $tq1 [array get qos_1]] 72 check_rc [add_qos $tq2 [array get qos_2]] 73 check_rc [add_acct $ta1 [array get acct_1]] 74 check_rc [add_user $tu1 [array get user_req_1]] 75} 76 77# Cleanup test assoc and accounts 78proc cleanup_accounts {} { 79 global ta1 tq1 tq2 80 81 #wait_for_account_done $ta1,$ta2 82 83 log_info "remove QOS: $tq1, $tq2" 84 remove_qos $tq1,$tq2 85 86 log_info "remove account: $ta1" 87 remove_acct "" $ta1 88} 89 90proc check_rc { exit_code } { 91 if {$exit_code != 0} { 92 fail "Exiting with exit code $exit_code" 93 } 94} 95 96proc test_bin_pass { num bin args het_job count } { 97 global job_name salloc ta1 tq2 srun number eol is_skip bin_true 98 99 set got_init 0 100 set got_sub1 0 101 set got_sub2 0 102 set got_sub3 0 103 set rc -12345 104 set is_builtin [string equal [get_config_param "SchedulerType"] "sched/builtin"] 105 106 log_info "**** TEST PASS $num ****" 107 eval spawn $bin [join $args " "] 108 set pid [exp_pid] 109 expect { 110 -re "^(srun|salloc|sbatch): error: .*: Requested operation not supported on this system$eol" { 111 if { $het_job && $is_builtin } { 112 log_warn "Hetjobs are not supported with sched/builtin this failure is expected" 113 set is_skip 1 114 return 0 115 } 116 } 117 -re "^(srun|salloc|sbatch): initialized$eol" { 118 incr got_init 119 exp_continue 120 } 121 # salloc: 0: submit1 122 -re "^(srun|salloc|sbatch): ($number: |)submit1$eol" { 123 if {!$het_job || $expect_out(2,string) != ""} { 124 incr got_sub1 125 } 126 exp_continue 127 } 128 -re "^(srun|salloc|sbatch): ($number: |)submit2$eol" { 129 if {!$het_job || $expect_out(2,string) != ""} { 130 incr got_sub2 131 } 132 exp_continue 133 } 134 -re "^(srun|salloc|sbatch): ($number: |)submit3$eol" { 135 if {!$het_job || $expect_out(2,string) != ""} { 136 incr got_sub3 137 } 138 exp_continue 139 } 140 timeout { 141 slow_kill [expr 0 - $pid] 142 fail "$bin not responding" 143 } 144 eof { 145 lassign [wait] pid spawnid os_error_flag rc 146 } 147 } 148 149 log_info "$bin rc:$rc init:$got_init sub1:$got_sub1 sub2:$got_sub2 sub3:$got_sub3 expected:$count" 150 151 if {($rc != 0) || 152 ($got_sub1 != $count) || 153 ($got_sub2 != $count) || 154 ($got_sub3 != $count)} { 155 fail "Invalid $bin response" 156 } 157} 158 159proc test_bin_fail { num bin args het_job count } { 160 global job_name ta1 tq1 tq2 srun number eol is_skip bin_true 161 162 set got_init 0 163 set got_sub1 0 164 set got_sub2 0 165 set got_sub3 0 166 set rc -12345 167 set is_builtin [string equal [get_config_param "SchedulerType"] "sched/builtin"] 168 169 log_info "**** TEST FAIL $num ****" 170 eval spawn $bin [join $args " "] 171 set pid [exp_pid] 172 expect { 173 -re "^(srun|salloc|sbatch): error: .*: Requested operation not supported on this system$eol" { 174 if { $het_job && $is_builtin } { 175 log_warn "Hetjobs are not supported with sched/builtin this failure is expected" 176 set is_skip 1 177 return 0 178 } 179 } 180 -re "^(srun|salloc|sbatch): error: initialized$eol" { 181 incr got_init 182 exp_continue 183 } 184 #srun: error: submit1\r\n 185 #salloc: error: 0: submit1\r\n 186 -re "^(srun|salloc|sbatch): error: ($number: |)submit1$eol" { 187 if {!$het_job || $expect_out(2,string) != ""} { 188 incr got_sub1 189 } 190 exp_continue 191 } 192 -re "^(srun|salloc|sbatch): error: ($number: |)submit2$eol" { 193 if {!$het_job || $expect_out(2,string) != ""} { 194 incr got_sub2 195 } 196 exp_continue 197 } 198 -re "^(srun|salloc|sbatch): error: ($number: |)submit3$eol" { 199 if {!$het_job || $expect_out(2,string) != ""} { 200 incr got_sub3 201 } 202 exp_continue 203 } 204 timeout { 205 slow_kill [expr 0 - $pid] 206 fail "$bin not responding" 207 } 208 eof { 209 lassign [wait] pid spawnid os_error_flag rc 210 } 211 } 212 213 log_info "$bin rc:$rc init:$got_init sub1:$got_sub1 sub2:$got_sub2 sub3:$got_sub3 expected:$count" 214 215 if {($rc == 0) || 216 ($got_sub1 != $count) || 217 ($got_sub2 != $count) || 218 ($got_sub3 != $count)} { 219 fail "Invalid $bin response" 220 } 221} 222 223if {![have_lua]} { 224 skip "LUA must be installed and enabled to test lua job_submit plugin." 225} 226 227if {[get_config_param "AccountingStorageType"] ne "accounting_storage/slurmdbd"} { 228 skip "This test can't be run without a usable AccountStorageType" 229} 230if {![param_contains [get_config_param "AccountingStorageEnforce"] "limits"]} { 231 skip "This test can't be run without enforcing limits" 232} 233 234# Verify cluster is able to run largest test job 235set nodes [get_nodes_by_request "--ntasks-per-node=5 -N3"] 236if { [llength $nodes] != 3 } { 237 skip "System too small for test, it needs '--ntasks-per-node=5 -N3'" 238} 239 240proc cleanup {} { 241 global config_dir 242 243 cleanup_accounts 244 restore_conf $config_dir/job_submit.lua 245 restore_conf $config_dir/slurm.conf 246 reconfigure 247} 248 249cleanup_accounts 250create_accounts 251 252set config_dir [get_conf_path] 253save_conf $config_dir/job_submit.lua 254save_conf $config_dir/slurm.conf 255 256# Activate lua plugin 257exec $bin_sed -i {s/^\(JobSubmitPlugins\)/#\1/gI} $config_dir/slurm.conf 258exec $bin_echo "\n### test7.20 additions####\nJobSubmitPlugins=lua" >> $config_dir/slurm.conf 259reconfigure -fail 260 261run_command "$bin_rm $config_dir/job_submit.lua" 262run_command -fail "$bin_cp $test_lua_reject $config_dir/job_submit.lua" 263# Sleep for 1 second to make sure that modify time is different from last copy. 264sleep 1 265file mtime $config_dir/job_submit.lua [timestamp] 266 267# Check that all job types are rejected 268test_bin_fail "R1" $salloc {-t1 -J $job_name -A $ta1 --qos $tq2 -n5 "$bin_true"} 0 1 269test_bin_fail "R2" $salloc {-t1 -J $job_name -A $ta1 --qos $tq2 -n5 : -n3 : -n1 "$bin_true"} 1 1 270test_bin_fail "R3" $srun {-t1 -J $job_name --mpi=none -A $ta1 --qos $tq2 -n5 $bin_true} 0 1 271test_bin_fail "R4" $srun {-t1 -J $job_name --mpi=none -A $ta1 --qos $tq2 -n5 : -n3 : -n1 $bin_true} 1 1 272test_bin_fail "R5" $sbatch {-t1 -J $job_name -o /dev/null -W -A $ta1 --qos $tq2 -n5 --wrap $bin_true} 0 1 273test_bin_fail "R6" $sbatch {-t1 -J $job_name -o /dev/null -W -A $ta1 --qos $tq2 -n5 --array 10 --wrap $bin_true} 0 1 274test_bin_fail "R7" $sbatch {-t1 -J $job_name -o /dev/null -W -A $ta1 --qos $tq2 -n5 : -n3 : -n1 --wrap $bin_true} 1 1 275test_bin_fail "R8" $sbatch {-t1 -J $job_name --comment=PASS -o /dev/null -W -A $ta1 --qos $tq2 -n1 : --comment=ERROR -n3 : --comment=ERROR -n5 --wrap $bin_true} 1 2 276 277run_command -fail "$bin_cp $test_lua_pass $config_dir/job_submit.lua" 278# Sleep for 1 second to make sure that modify time is different from last copy. 279sleep 1 280file mtime $config_dir/job_submit.lua [timestamp] 281 282# Check that passing works 283test_bin_pass "P1" $salloc {-t1 -J $job_name -A $ta1 --qos $tq2 -n5 "$bin_true"} 0 1 284test_bin_pass "P2" $salloc {-t1 -J $job_name -A $ta1 --qos $tq2 -n5 : -n3 : -n1 "$bin_true"} 1 3 285test_bin_pass "P3" $srun {-t1 -J $job_name --mpi=none -A $ta1 --qos $tq2 -n5 $bin_true} 0 1 286test_bin_pass "P4" $srun {-t1 -J $job_name --mpi=none -A $ta1 --qos $tq2 -n5 : -n3 : -n1 $bin_true} 1 3 287test_bin_pass "P5" $sbatch {-t1 -J $job_name -o /dev/null -W -A $ta1 --qos $tq2 -n5 --wrap $bin_true} 0 1 288test_bin_pass "P6" $sbatch {-t1 -J $job_name -o /dev/null -W -A $ta1 --qos $tq2 -n5 --array 10 --wrap $bin_true} 0 1 289test_bin_pass "P7" $sbatch {-t1 -J $job_name -o /dev/null -W -A $ta1 --qos $tq2 -n5 : -n3 : -n1 --wrap $bin_true} 1 3 290 291# Check that messages are still sent with failing QOS but passing filter 292test_bin_fail "F1" $salloc {-t1 -J $job_name -A $ta1 --qos $tq1 -n5 "$bin_true"} 0 1 293test_bin_fail "F2" $salloc {-t1 -J $job_name -A $ta1 --qos $tq1 -n5 : -n3 : -n1 "$bin_true"} 1 1 294test_bin_fail "F3" $srun {-t1 -J $job_name --mpi=none -A $ta1 --qos $tq1 -n5 $bin_true} 0 1 295test_bin_fail "F4" $srun {-t1 -J $job_name --mpi=none -A $ta1 --qos $tq1 -n5 : -n3 : -n1 $bin_true} 1 1 296test_bin_fail "F5" $sbatch {-t1 -J $job_name -o /dev/null -W -A $ta1 --qos $tq1 -n5 --wrap $bin_true} 0 1 297test_bin_fail "F6" $sbatch {-t1 -J $job_name -o /dev/null -W -A $ta1 --qos $tq1 -n5 --array 10 --wrap $bin_true} 0 1 298test_bin_fail "F7" $sbatch {-t1 -J $job_name -o /dev/null -W -A $ta1 --qos $tq1 -n5 : -n3 : -n1 --wrap $bin_true} 1 3 299 300if {$exit_code} { 301 fail "Test failed due to previous errors (\$exit_code = $exit_code)" 302} 303if {$is_skip} { 304 skip "Some subtests were skipped" 305} 306