1#!/usr/bin/env expect
2############################################################################
3# Purpose: Test of Slurm functionality
4#          Basic sattach functionality test (--layout, --verbose, --label
5#          and --output-filter options).
6############################################################################
7# Copyright (C) 2002-2006 The Regents of the University of California.
8# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
9# Written by Morris Jette <jette1@llnl.gov>
10# CODE-OCEC-09-009. All rights reserved.
11#
12# This file is part of Slurm, a resource management program.
13# For details, see <https://slurm.schedmd.com/>.
14# Please also read the included file: DISCLAIMER.
15#
16# Slurm is free software; you can redistribute it and/or modify it under
17# the terms of the GNU General Public License as published by the Free
18# Software Foundation; either version 2 of the License, or (at your option)
19# any later version.
20#
21# Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
22# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
23# FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
24# details.
25#
26# You should have received a copy of the GNU General Public License along
27# with Slurm; if not, write to the Free Software Foundation, Inc.,
28# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
29############################################################################
30source ./globals
31
32set exit_code   0
33set file_prog   "test$test_id.prog"
34set job_id      0
35set matches     0
36
37proc cleanup {} {
38	global bin_rm file_prog
39
40	exec $bin_rm -f $file_prog
41}
42
43if {[get_config_param "LaunchType"] ne "launch/slurm"} {
44	skip "This test is only compatible with systems using launch/slurm"
45}
46
47set node_cnt 1-4
48set task_cnt 4
49
50#
51# Delete left-over program and rebuild it
52#
53exec $bin_rm -f $file_prog
54exec $bin_cc -O -o $file_prog ${file_prog}.c
55exec $bin_chmod 700 $file_prog
56
57#
58# Spawn initial program via srun
59#
60set timeout $max_job_delay
61set salloc_pid [spawn $salloc -N $node_cnt -t2 $srun -n $task_cnt --overcommit ./$file_prog]
62set init_id $spawn_id
63expect {
64        -i $init_id
65	-re "Granted job allocation ($number)" {
66		set job_id $expect_out(1,string)
67		exp_continue
68	}
69	-re "WAITING" {
70		incr matches
71		if {$matches != $task_cnt} {
72			exp_continue
73		}
74	}
75	timeout {
76		log_error "salloc not responding"
77		cancel_job $job_id
78		slow_kill [expr 0 - $salloc_pid]
79		set exit_code 1
80	}
81	eof {
82		wait
83	}
84}
85if {$job_id == 0} {
86	fail "Job submit failure"
87}
88if {$matches != $task_cnt} {
89	fail "Job run time failure"
90}
91
92# Wait for startup to complete (including RESPONSE_LAUNCH_TASKS message to srun)
93# before attempting to attach
94sleep 0.1
95
96#
97# Get task layout information
98#
99set matches     0
100spawn $sattach --layout $job_id.0
101set attach_id $spawn_id
102expect {
103	-i $attach_id
104	-re "($number) tasks, ($number) nodes" {
105		incr matches
106		exp_continue
107	}
108	timeout {
109		fail "sattach not responding"
110	}
111	eof {
112		wait
113	}
114}
115if {$matches == 0} {
116	log_error "Layout information not printed"
117	set exit_code 1
118}
119
120#
121# Attach to initial program, just get one tasks output
122#
123set matches     0
124set timeout     10
125set attach_pid [spawn $sattach -l --output-filter=[expr $task_cnt - 1] $job_id.0]
126set attach_id $spawn_id
127expect {
128        -i $attach_id
129	-re "($number): WAITING" {
130		if {$expect_out(1,string) != [expr $task_cnt - 1]} {
131			log_error "Output filtering by task failed"
132			set exit_code 1
133		} else {
134			incr matches
135			exec $bin_kill -KILL $attach_pid
136		}
137		exp_continue
138	}
139	timeout {
140		fail "sattach not responding"
141	}
142	eof {
143		wait
144	}
145}
146if {$matches == 0} {
147	log_error "Failed to filter task output"
148	set exit_code 1
149}
150
151#
152# Attach to initial program
153#
154set matches     0
155set timeout     10
156spawn $sattach -vv -l $job_id.0
157set attach_id $spawn_id
158expect {
159        -i $attach_id
160	-re "verbose *: 2" {
161		incr matches
162		exp_continue
163	}
164	-re "($number): WAITING" {
165		incr matches
166		if {$matches == [expr $task_cnt + 1]} {
167			send -i $attach_id "exit\r"
168		}
169		exp_continue
170	}
171	timeout {
172		fail "sattach not responding"
173	}
174	eof {
175		wait
176	}
177}
178if {$matches != [expr $task_cnt + 1]} {
179	log_error "Job run time failure ($matches != [expr $task_cnt + 1])"
180	set exit_code 1
181}
182
183#
184# Make sure initial program terminates too
185#
186# Explicitly reset spawn_id for wait call
187set spawn_id $init_id
188expect {
189	timeout {
190		fail "srun (terminate) not responding"
191	}
192	eof {
193		wait
194	}
195}
196
197if {$exit_code != 0} {
198	cancel_job $job_id
199	fail "Test failed due to previous errors (\$exit_code = $exit_code)"
200}
201