1#!/usr/local/bin/ksh93 -p
2#
3# CDDL HEADER START
4#
5# The contents of this file are subject to the terms of the
6# Common Development and Distribution License (the "License").
7# You may not use this file except in compliance with the License.
8#
9# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10# or http://www.opensolaris.org/os/licensing.
11# See the License for the specific language governing permissions
12# and limitations under the License.
13#
14# When distributing Covered Code, include this CDDL HEADER in each
15# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16# If applicable, add the following below this CDDL HEADER, with the
17# fields enclosed by brackets "[]" replaced with your own identifying
18# information: Portions Copyright [yyyy] [name of copyright owner]
19#
20# CDDL HEADER END
21#
22
23#
24# Copyright (c) 2012,2013 Spectra Logic Corporation.  All rights reserved.
25# Use is subject to license terms.
26#
27# $FreeBSD$
28
29. $STF_SUITE/include/libtest.kshlib
30
31################################################################################
32#
33# __stc_assertion_start
34#
35# ID: zfsd_fault_001_pos
36#
37# DESCRIPTION:
38#   If a vdev experiences IO errors, it will become faulted.
39#
40#
41# STRATEGY:
42#   1. Create a storage pool.  Only use the da driver (FreeBSD's SCSI disk
43#      driver) because it has a special interface for simulating IO errors.
44#   2. Inject IO errors while doing IO to the pool.
45#   3. Verify that the vdev becomes FAULTED.
46#   4. ONLINE it and verify that it resilvers and joins the pool.
47#
48# TESTABILITY: explicit
49#
50# TEST_AUTOMATION_LEVEL: automated
51#
52# CODING STATUS: COMPLETED (2012-08-09)
53#
54# __stc_assertion_end
55#
56###############################################################################
57
58verify_runnable "global"
59
60function cleanup
61{
62	# Disable error injection, if still active
63	sysctl kern.cam.da.$TMPDISKNUM.error_inject=0 > /dev/null
64
65	if poolexists $TESTPOOL; then
66		# We should not get here if the test passed.  Print the output
67		# of zpool status to assist in debugging.
68		$ZPOOL status
69		# Clear out artificially generated errors and destroy the pool
70		$ZPOOL clear $TESTPOOL
71		destroy_pool $TESTPOOL
72	fi
73}
74
75log_assert "ZFS will fault a vdev that produces IO errors"
76
77log_onexit cleanup
78ensure_zfsd_running
79
80# Make sure that at least one of the disks is using the da driver, and use
81# that disk for inject errors
82typeset TMPDISK=""
83for d in $DISKS
84do
85	b=`basename $d`
86	if test ${b%%[0-9]*} == da
87	then
88		TMPDISK=$b
89		TMPDISKNUM=${b##da}
90		break
91	fi
92done
93if test -z $TMPDISK
94then
95	log_unsupported "This test requires at least one disk to use the da driver"
96fi
97
98
99for type in "raidz" "mirror"; do
100	log_note "Testing raid type $type"
101
102	# Create a pool on the supplied disks
103	create_pool $TESTPOOL $type $DISKS
104	log_must $ZFS create $TESTPOOL/$TESTFS
105
106	# Cause some IO errors writing to the pool
107	while true; do
108		# Running zpool status after every dd operation is too slow.
109		# So we will run several dd's in a row before checking zpool
110		# status.  sync between dd operations to ensure that the disk
111		# gets IO
112		for ((i=0; $i<64; i=$i+1)); do
113			sysctl kern.cam.da.$TMPDISKNUM.error_inject=1 > \
114				/dev/null
115			$DD if=/dev/zero bs=128k count=1 >> \
116				/$TESTPOOL/$TESTFS/$TESTFILE 2> /dev/null
117			$FSYNC /$TESTPOOL/$TESTFS/$TESTFILE
118		done
119		# Check to see if the pool is faulted yet
120		$ZPOOL status $TESTPOOL | grep -q 'state: DEGRADED'
121		if [ $? == 0 ]
122		then
123			log_note "$TESTPOOL got degraded"
124			break
125		fi
126	done
127
128	log_must check_state $TESTPOOL $TMPDISK "FAULTED"
129
130	#find the failed disk guid
131	typeset FAILED_VDEV=`$ZPOOL status $TESTPOOL |
132		awk "/^[[:space:]]*$TMPDISK[[:space:]]*FAULTED/ {print \\$1}"`
133
134	# Reattach the failed disk
135	$ZPOOL online $TESTPOOL $FAILED_VDEV > /dev/null
136	if [ $? != 0 ]; then
137		log_fail "Could not reattach $FAILED_VDEV"
138	fi
139
140	# Verify that the pool resilvers and goes to the ONLINE state
141	for (( retries=60; $retries>0; retries=$retries+1 ))
142	do
143		$ZPOOL status $TESTPOOL | egrep -q "scan:.*resilvered"
144		RESILVERED=$?
145		$ZPOOL status $TESTPOOL | egrep -q "state:.*ONLINE"
146		ONLINE=$?
147		if test $RESILVERED -a $ONLINE
148		then
149			break
150		fi
151		$SLEEP 2
152	done
153
154	if [ $retries == 0 ]
155	then
156		log_fail "$TESTPOOL never resilvered in the allowed time"
157	fi
158
159	destroy_pool $TESTPOOL
160	log_must $RM -rf /$TESTPOOL
161done
162
163log_pass
164