1#!/usr/bin/env expect
2############################################################################
3# Purpose: Test of Slurm functionality
4# 	   sacctmgr usagefactor functionality
5############################################################################
6# Copyright (C) 2020 SchedMD LLC.
7# Written by Scott Jackson <scottmo@schedmd.com>
8#
9# This file is part of Slurm, a resource management program.
10# For details, see <https://slurm.schedmd.com/>.
11# Please also read the included file: DISCLAIMER.
12#
13# Slurm is free software; you can redistribute it and/or modify it under
14# the terms of the GNU General Public License as published by the Free
15# Software Foundation; either version 2 of the License, or (at your option)
16# any later version.
17#
18# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
19# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
20# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
21# details.
22#
23# You should have received a copy of the GNU General Public License along
24# with Slurm; if not, write to the Free Software Foundation, Inc.,
25# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
26############################################################################
27source ./globals_accounting
28
29set cluster             [get_config_param "ClusterName"]
30set partition           [default_partition]
31set qos_prefix          "${test_name}_qos_"
32set test_acct           "${test_name}_acct"
33set test_user           [get_my_user_name]
34set access_err          0
35set job_list            {}
36
37set qoses {
38	high {UsageFactor 2}
39	normal {UsageFactor 1}
40	low {UsageFactor .5}
41	zero {UsageFactor 0}
42}
43
44if {[get_config_param "AccountingStorageType"] ne "accounting_storage/slurmdbd"} {
45	skip "This test can't be run without AccountStorageType=slurmdbd"
46}
47if {![param_contains [get_config_param "AccountingStorageEnforce"] "limits"]} {
48	skip "This test can't be run without AccountingStorageEnforce=limits"
49}
50if {![param_contains [get_config_param "AccountingStorageEnforce"] "qos"]} {
51	skip "This test can't be run without AccountingStorageEnforce=qos"
52}
53if {[string compare [get_admin_level] "Administrator"]} {
54	skip "This test can't be run without being an Accounting administrator"
55}
56
57proc cleanup {} {
58	global job_list qos_prefix qoses scancel test_acct
59
60	log_debug "Cleaning up"
61
62	# Cancel any lingering jobs
63	cancel_job $job_list
64
65	# Delete test account
66	remove_acct "" $test_acct
67
68	# Delete test qoses
69	foreach key [dict keys $qoses] {
70		set test_qos "${qos_prefix}$key"
71		remove_qos $test_qos
72	}
73
74	# Reconfigure to restore the TRESBillingWeights
75	reconfigure
76}
77
78#
79# Start clean
80#
81cleanup
82
83#
84# Add test account
85#
86log_debug "Adding account $test_acct"
87if [add_acct $test_acct [list cluster $cluster]] {
88	fail "Unable to create account $test_acct"
89}
90
91#
92# Add user to the account
93#
94log_debug "Adding user $test_user to account $test_acct"
95if [add_user $test_user [list cluster $cluster account $test_acct]] {
96	fail "Unable to add user $test_user to account $test_acct"
97}
98
99#
100# Add test qoses and associate qos with user
101#
102dict for {key qos_spec} $qoses {
103	set test_qos "${qos_prefix}$key"
104
105	log_debug "Adding qos $test_qos"
106	if [add_qos $test_qos $qos_spec] {
107		fail "Unable to create qos $test_qos"
108	}
109
110	log_debug "Adding qos $test_qos to user $test_user"
111	if [mod_user $test_user [list cluster $cluster] [list qos +$test_qos] {}] {
112		fail "Unable to add qos $test_qos to user $test_user"
113	}
114}
115
116#
117# Modify TRESBillingWeights so this does not throw off the test
118#
119spawn $scontrol update partitionname=$partition TRESBillingWeights=
120expect {
121	-re "error" {
122		fail "Unable to reset TRESBillingWeights"
123	}
124	timeout {
125		fail "scontrol is not responding"
126	}
127	eof {
128		wait
129	}
130}
131
132#
133# Clear the usage statistics
134#
135if [reset_account_usage "$cluster" "$test_acct"] {
136	fail "Unable to reset account usage"
137}
138
139#
140# Test Usage Factor Function
141#
142proc test_usage_factor { qos usage_factor } {
143	global sbatch scontrol test_acct number test_name
144
145	log_info "Testing for a usage factor of $usage_factor with qos $qos"
146
147	#
148	# Submit a short job to the specified qos
149	#
150	set job_time 2
151	set script "\"sleep 10\""
152	set command "$sbatch -J ${test_name} -t $job_time --account=$test_acct --qos=$qos --wrap $script"
153	set observed_usage -1
154	set job_id 0
155
156	spawn {*}$command
157	expect {
158		-re "Submitted batch job (\\d+)" {
159			set job_id $expect_out(1,string)
160			lappend job_list $job_id
161			exp_continue
162		}
163		timeout {
164			fail "sbatch not responding"
165		}
166		eof {
167			wait
168		}
169	}
170	if {$job_id == 0} {
171		fail "sbatch did not return a job id"
172	}
173
174	#
175	# Wait for job to enter running state
176	#
177	if {[wait_for_job -pollinterval .1 $job_id "RUNNING"] != 0} {
178		fail "Error waiting for job $job_id to be RUNNING"
179	}
180
181	#
182	# Get the number of allocated CPUs of the job to compute the
183	# expected_usage
184	#
185	spawn $scontrol show job $job_id
186	expect {
187		-re "NumCPUs=($number)" {
188			set num_cpus $expect_out(1,string)
189			exp_continue
190		}
191		timeout {
192			fail "scontrol not responding"
193		}
194		eof {
195			wait
196		}
197	}
198	set expected_usage [expr $usage_factor * $job_time * $num_cpus]
199
200	#
201	# Verify that scontrol show assoc shows the appropriately scaled usage
202	# for the specified qos
203	#
204	spawn $scontrol show assoc_mgr qos=$qos flags=qos
205	expect {
206		-re "GrpTRESRunMins=cpu=\[^\\(\]+\\((\[^\\(\]+)\\)" {
207			set observed_usage $expect_out(1,string)
208			exp_continue
209		}
210		timeout {
211			fail "scontrol not responding"
212		}
213		eof {
214			wait
215		}
216	}
217
218	# A tolerance of -1 (sec) must be permitted because usage can be accrued
219	# one second before the usage value can be returned.
220	subtest [tolerance $expected_usage $observed_usage -1] "Check that usage for qos $qos is within tolerance (-1 sec)" "$observed_usage not in \[$expected_usage-1, $expected_usage\]"
221
222	#
223	# Cancel the job
224	#
225	cancel_job $job_id
226}
227
228dict for {key qos_spec} $qoses {
229	set test_qos "${qos_prefix}$key"
230	dict with qos_spec {
231		test_usage_factor $test_qos $UsageFactor
232	}
233}
234