1#!/usr/bin/env expect
2############################################################################
3# Purpose: Test of Slurm functionality
4#          Test of sinfo cpu total and allocated
5############################################################################
6# Copyright (C) 2009 Lawrence Livermore National Security.
7# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
8# Written by Joseph Donaghy <donaghy1@llnl.gov>
9# CODE-OCEC-09-009. All rights reserved.
10#
11# This file is part of Slurm, a resource management program.
12# For details, see <https://slurm.schedmd.com/>.
13# Please also read the included file: DISCLAIMER.
14#
15# Slurm is free software; you can redistribute it and/or modify it under
16# the terms of the GNU General Public License as published by the Free
17# Software Foundation; either version 2 of the License, or (at your option)
18# any later version.
19#
20# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
21# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
22# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
23# details.
24#
25# You should have received a copy of the GNU General Public License along
26# with Slurm; if not, write to the Free Software Foundation, Inc.,
27# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
28############################################################################
29source ./globals
30
31set file_in     "test$test_id.in"
32set exit_code   0
33set pnumsc      0
34set pnumsi      0
35set aprocsc     0
36set aprocsi     0
37set inode_found 0
38set inode_name  ""
39set smallest    1
40set layout "static"
41set select_type ""
42
43proc scontrol_test { node proc_cnt } {
44	global scontrol number test_prompt
45	upvar spawn_id spawn_id
46
47	set found 0
48	set rc 0
49
50	send "$scontrol show node $node\r"
51	expect {
52		-re "CPUAlloc=($number)" {
53			set num_alloc $expect_out(1,string)
54			set found 1
55			if {$proc_cnt != $num_alloc} {
56				log_error "Requested $proc_cnt but got $num_alloc instead"
57				set rc 1
58			}
59			exp_continue
60		}
61		-re $test_prompt {
62		}
63		timeout {
64			log_error "scontrol not responding"
65			slow_kill $mypid
66			return 1
67		}
68		eof {
69			wait
70		}
71	}
72
73	if {!$found} {
74		log_error "Didn't get expected output from scontrol"
75		set rc 1
76	}
77
78	return $rc
79}
80
81proc sinfo_test_1 { node proc_cnt total_procs idle_cpus } {
82	global float number test_prompt sinfo slash
83	upvar spawn_id spawn_id
84
85	set found 0
86	set rc 0
87
88	send "$sinfo -o \"%C %A %N\" -h -n $node\r"
89	expect {
90		-re "($float)(K?)($slash)($float)(K?)($slash)($float)(K?)($slash)($float)(K?) ($number)($slash)($number) $node" {
91			set found 1
92			set num_alloc $expect_out(1,string)
93			if {[string compare $expect_out(2,string) ""]} {
94				set num_alloc [expr $num_alloc * 1024]
95			}
96			set num_idle $expect_out(4,string)
97			if {[string compare $expect_out(5,string) ""]} {
98				set num_idle [expr $num_idle * 1024]
99			}
100			set num_other $expect_out(7,string)
101			if {[string compare $expect_out(8,string) ""]} {
102				set num_other [expr $num_other * 1024]
103			}
104			set num_total $expect_out(10,string)
105			if {[string compare $expect_out(11,string) ""]} {
106				set num_total [expr $num_total * 1024]
107			}
108
109			if { $num_alloc != $proc_cnt } {
110				log_error "sinfo 1 allocated cpus wrong, got $num_alloc but needed $proc_cnt"
111				set rc 1
112			} elseif { $num_idle != $idle_cpus } {
113				log_error "sinfo 1 idle cpus wrong, got $num_idle but needed $idle_cpus"
114				set rc 1
115			} elseif { $num_total != $total_procs } {
116				log_error "sinfo 1 total cpus wrong, got $num_total but needed $total_procs"
117				set rc 1
118			}
119			exp_continue
120		}
121		-re $test_prompt {
122		}
123		timeout {
124			log_error "sinfo not responding"
125			slow_kill $mypid
126			return 1
127		}
128		eof {
129			wait
130		}
131	}
132
133	if {!$found} {
134		log_error "Didn't get expected output from sinfo"
135		set rc 1
136	}
137
138	return $rc
139}
140
141proc sinfo_test_2 { node proc_cnt total_procs } {
142	global sinfo number test_prompt
143	upvar spawn_id spawn_id
144
145	set rc 0
146	set num_alloc 0
147	set num_idle 0
148	set alloc_nodes 1
149	set total_nodes 1
150
151	set idle_nodes [expr $total_nodes - $alloc_nodes]
152
153	send "$sinfo -o \"%t %D %N\" -h -n $node\r"
154	expect {
155		-re "alloc ($number)(K?) $node" {
156			set num_alloc $expect_out(1,string)
157			if {[string compare $expect_out(2,string) ""]} {
158				set num_alloc [expr $inode_procs * 1024]
159			}
160			exp_continue
161		}
162		-re "idle ($number)(K?) $node" {
163			set num_idle $expect_out(1,string)
164			if {[string compare $expect_out(2,string) ""]} {
165				set num_idle [expr $num_idle * 1024]
166			}
167			exp_continue
168		}
169		-re "mix ($number)(K?) $node" {
170			set num_alloc $expect_out(1,string)
171			if {[string compare $expect_out(2,string) ""]} {
172				set num_alloc [expr $inode_procs * 1024]
173			}
174			exp_continue
175		}
176		-re $test_prompt {
177		}
178		timeout {
179			log_error "sinfo not responding"
180			slow_kill $mypid
181			return 1
182		}
183		eof {
184			wait
185		}
186	}
187
188	if { $num_alloc != $alloc_nodes } {
189		log_error "sinfo 2 allocated nodes wrong, got $num_alloc but needed $alloc_nodes"
190		set rc 1
191	} elseif { $num_idle != $idle_nodes } {
192		log_error "sinfo 2 idle nodes wrong, got $num_idle but needed $idle_nodes"
193		set rc 1
194	}
195
196	return $rc
197}
198
199# allocate a set of nodes (node_cnt) and the quit right after
200proc allocate_and_quit { node proc_cnt total_procs } {
201	global bin_bash salloc scontrol sinfo number re_word_str
202	global test_prompt select_type procs_per_node test_id bin_rm
203	global reset_bash_prompt
204
205	set file_in "test$test_id.input"
206	set job_id 0
207	set num_alloc 0
208	set block ""
209	set rc 0
210	set timeout 60
211	set idle_cpus [expr $total_procs - $proc_cnt]
212
213	make_bash_script $file_in "
214	  $reset_bash_prompt
215	  $bin_bash --norc
216	"
217
218	set mypid [spawn $salloc -w $node -N 1 -t 5 -n $proc_cnt ./$file_in]
219	expect {
220		-re "Granted job allocation ($number)" {
221			set job_id $expect_out(1,string)
222			exp_continue
223		}
224
225		-re $test_prompt {
226			# test for scontrol to give me the correct cpu count
227			if { [scontrol_test $node $proc_cnt] } {
228				send "exit\r"
229				return 1
230			}
231
232
233			# test for sinfo to give me the correct cpu count
234			if { [sinfo_test_1 $node $proc_cnt $total_procs $idle_cpus] } {
235				send "exit\r"
236				return 1
237			}
238			# test for sinfo to give me the correct node count
239			if { [sinfo_test_2 $node $proc_cnt $total_procs] } {
240				send "exit\r"
241				return 1
242			}
243			send "exit\r"
244			exp_continue
245		}
246
247		-re "Unable to contact" {
248			log_error "Slurm appears to be down"
249			exp_continue
250		}
251		timeout {
252			cancel_job $job_id
253			slow_kill $mypid
254			fail "salloc not responding"
255		}
256		eof {
257			wait
258		}
259	}
260	exec $bin_rm -f $file_in
261
262	return $rc
263}
264
265############################################################################
266# test starts here
267############################################################################
268
269set select_type_parameters [get_config_param "SelectTypeParameters"]
270if {[param_contains $select_type_parameters "CR_ONE_TASK_PER_CORE"]} {
271	skip "This test is incompatible SelectTypeParameters=CR_ONE_TASK_PER_CORE"
272}
273
274# find the default partition
275set def_part [default_partition]
276
277# find the nodes in the default partition
278set def_node_name ""
279spawn $sinfo -h -o "=%N=" -p $def_part
280expect {
281	-re "=($re_word_str).($number)-($number).=" {
282		set def_node_name $expect_out(1,string)
283		exp_continue
284	}
285	eof {
286		wait
287	}
288}
289if {[string compare $def_node_name ""] == 0} {
290	skip "Node name format not supported for this test"
291}
292
293log_user 0
294set def_hostlist ""
295set part_exclusive 0
296spawn $scontrol show part $def_part
297expect {
298	-re " Nodes=($re_word_str)"  {
299		set def_hostlist $expect_out(1,string)
300		exp_continue
301	}
302	-re " BasePartitions=($re_word_str)" {
303		set def_hostlist $expect_out(1,string)
304		exp_continue
305	}
306	-re " OverSubscribe=EXCLUSIVE" {
307		set part_exclusive 1
308		exp_continue
309	}
310	timeout {
311		fail "scontrol not responding"
312	}
313	eof {
314		wait
315	}
316}
317set host_cnt 0
318spawn $scontrol show hostnames $def_hostlist
319expect {
320	-re "($re_word_str)"  {
321		set host_name($host_cnt) $expect_out(1,string)
322		incr host_cnt
323		exp_continue
324	}
325	timeout {
326		fail "scontrol not responding"
327	}
328	eof {
329		wait
330	}
331}
332log_user 1
333if {$host_cnt == 0} {
334	fail "Could not find any nodes in default partition"
335}
336
337# find me an idle node in default partition
338log_user 0
339set inode_name ""
340set inode_cores_per_socket 0
341set inode_procs 0
342set units ""
343set inode_sockets 0
344set inode_threads_per_core 0
345
346set fd [open "|$scontrol --oneliner show node $def_hostlist"]
347exp_internal 1
348while {[gets $fd line] != -1} {
349	if {[regexp {NodeName=(\w+).*CoresPerSocket=(\d+).*CPUTot=(\d+)(K?).*Sockets=(\d+).*State=IDLE ThreadsPerCore=(\d+)} $line frag inode_name inode_cores_per_socket inode_procs units inode_sockets inode_threads_per_core] == 1} {
350		break
351	}
352	if {[regexp {NodeName=(\w+).*CoresPerSocket=(\d+).*CPUTot=(\d+)(K?).*Sockets=(\d+).*State=IDLE.CLOUD ThreadsPerCore=(\d+)} $line frag inode_name inode_cores_per_socket inode_procs units inode_sockets inode_threads_per_core] == 1} {
353		break
354	}
355}
356exp_internal 0
357if {[string compare $units ""]} {
358	set inode_procs [expr $inode_procs * 1024]
359}
360catch {close $fd}
361
362log_user 1
363
364if {!$inode_procs} {
365	fail "Couldn't find an idle node in the default partition"
366}
367
368log_debug "Found idle node $inode_name with $inode_procs processors"
369
370# figure out the select plugin we are using
371if {[check_config_select "linear"]} {
372	 set smallest $inode_procs
373} else {
374	if {$part_exclusive == 1} {
375		set smallest $inode_procs
376	} elseif [param_contains $select_type_parameters "CR_CPU*"] {
377		set smallest $inode_threads_per_core
378	} elseif [param_contains $select_type_parameters "NONE"] {
379		set smallest $inode_threads_per_core
380	} elseif [param_contains $select_type_parameters "CR_CORE*"] {
381		set smallest $inode_threads_per_core
382	} elseif [param_contains $select_type_parameters "CR_SOCKET*"] {
383		set smallest [expr $inode_cores_per_socket *$inode_threads_per_core]
384	} else {
385		log_warn "Failed to parse SelectTypeParameters '$select_params'"
386		set smallest $inode_procs
387	}
388}
389
390set exit_code [allocate_and_quit $inode_name $smallest $inode_procs]
391if {!$exit_code && $smallest != $inode_procs} {
392	# just to make sure we get a clean state we will sleep a bit
393	sleep 1
394	set exit_code [allocate_and_quit $inode_name $inode_procs $inode_procs]
395}
396if {$exit_code != 0} {
397	fail "Test failed due to previous errors (\$exit_code = $exit_code)"
398}
399