1#!/bin/ksh -p
2#
3# CDDL HEADER START
4#
5# The contents of this file are subject to the terms of the
6# Common Development and Distribution License (the "License").
7# You may not use this file except in compliance with the License.
8#
9# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10# or https://opensource.org/licenses/CDDL-1.0.
11# See the License for the specific language governing permissions
12# and limitations under the License.
13#
14# When distributing Covered Code, include this CDDL HEADER in each
15# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16# If applicable, add the following below this CDDL HEADER, with the
17# fields enclosed by brackets "[]" replaced with your own identifying
18# information: Portions Copyright [yyyy] [name of copyright owner]
19#
20# CDDL HEADER END
21
22#
23# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
24#
25
26#
27# DESCRIPTION:
28# Test /proc/spl/kstat/zfs/<pool>/state kstat
29#
30# STRATEGY:
31# 1. Create a mirrored pool
32# 2. Check that pool is ONLINE
33# 3. Fault one disk
34# 4. Check that pool is DEGRADED
35# 5. Create a new pool with a single scsi_debug disk
36# 6. Remove the disk
37# 7. Check that pool is SUSPENDED
38# 8. Add the disk back in
39# 9. Clear errors and destroy the pools
40
41. $STF_SUITE/include/libtest.shlib
42
43verify_runnable "both"
44
45function cleanup
46{
47	# Destroy the scsi_debug pool
48	if [ -n "$TESTPOOL2" ] ; then
49		if  [ -n "$host" ] ; then
50			# Re-enable the disk
51			scan_scsi_hosts $host
52
53			# Device may have changed names after being inserted
54			SDISK=$(get_debug_device)
55			log_must ln $DEV_RDSKDIR/$SDISK $REALDISK
56		fi
57
58		# Restore our working pool image
59		if [ -n "$BACKUP" ] ; then
60			gunzip -c $BACKUP > $REALDISK
61			log_must rm -f $BACKUP
62		fi
63
64		if poolexists $TESTPOOL2 ; then
65			# Our disk is back.  Now we can clear errors and destroy the
66			# pool cleanly.
67			log_must zpool clear $TESTPOOL2
68
69			# Now that the disk is back and errors cleared, wait for our
70			# hung 'zpool scrub' to finish.
71			wait
72
73			destroy_pool $TESTPOOL2
74		fi
75		log_must rm -f $REALDISK
76		unload_scsi_debug
77	fi
78}
79
80# Check that our pool state values match what's expected
81#
82# $1: pool name
83# $2: expected state ("ONLINE", "DEGRADED", "SUSPENDED", etc)
84function check_all
85{
86	pool=$1
87	expected=$2
88
89	state1=$(zpool status $pool | awk '/state: /{print $2}');
90	state2=$(zpool list -H -o health $pool)
91	state3=$(</proc/spl/kstat/zfs/$pool/state)
92	log_note "Checking $expected = $state1 = $state2 = $state3"
93	if [[ "$expected" == "$state1" &&  "$expected" == "$state2" && \
94	    "$expected" == "$state3" ]] ; then
95		true
96	else
97		false
98	fi
99}
100
101log_onexit cleanup
102
103log_assert "Testing /proc/spl/kstat/zfs/<pool>/state kstat"
104
105# Test that the initial pool is healthy
106check_all $TESTPOOL "ONLINE"
107
108# Fault one of the disks, and check that pool is degraded
109read -r DISK1 _ <<<"$DISKS"
110log_must zpool offline -tf $TESTPOOL $DISK1
111check_all $TESTPOOL "DEGRADED"
112log_must zpool online $TESTPOOL $DISK1
113log_must zpool clear $TESTPOOL
114
115# Create a new pool out of a scsi_debug disk
116TESTPOOL2=testpool2
117MINVDEVSIZE_MB=$((MINVDEVSIZE / 1048576))
118load_scsi_debug $MINVDEVSIZE_MB 1 1 1 '512b'
119
120SDISK=$(get_debug_device)
121host=$(get_scsi_host $SDISK)
122
123# Use $REALDISK instead of $SDISK in our pool because $SDISK can change names
124# as we remove/add the disk (i.e. /dev/sdf -> /dev/sdg).
125REALDISK=/dev/kstat-state-realdisk
126log_must [ ! -e $REALDISK ]
127ln $DEV_RDSKDIR/$SDISK $REALDISK
128
129log_must zpool create $TESTPOOL2 $REALDISK
130
131# Backup the contents of the disk image
132BACKUP=$TEST_BASE_DIR/kstat-state-realdisk.gz
133log_must [ ! -e $BACKUP ]
134gzip -c $REALDISK > $BACKUP
135
136# Yank out the disk from under the pool
137log_must rm $REALDISK
138remove_disk $SDISK
139
140# Run a 'zpool scrub' in the background to suspend the pool.  We run it in the
141# background since the command will hang when the pool gets suspended.  The
142# command will resume and exit after we restore the missing disk later on.
143zpool scrub $TESTPOOL2 &
144# Once we trigger the zpool scrub, all zpool/zfs command gets stuck for 180 seconds.
145# Post 180 seconds zpool/zfs commands gets start executing however few more seconds(10s)
146# it take to update the status.
147# hence sleeping for 200 seconds so that we get the correct status.
148sleep 200		# Give the scrub some time to run before we check if it fails
149
150log_must check_all $TESTPOOL2 "SUSPENDED"
151
152log_pass "/proc/spl/kstat/zfs/<pool>/state test successful"
153