1#!/usr/local/bin/ksh93 -p 2# 3# CDDL HEADER START 4# 5# The contents of this file are subject to the terms of the 6# Common Development and Distribution License (the "License"). 7# You may not use this file except in compliance with the License. 8# 9# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10# or http://www.opensolaris.org/os/licensing. 11# See the License for the specific language governing permissions 12# and limitations under the License. 13# 14# When distributing Covered Code, include this CDDL HEADER in each 15# file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16# If applicable, add the following below this CDDL HEADER, with the 17# fields enclosed by brackets "[]" replaced with your own identifying 18# information: Portions Copyright [yyyy] [name of copyright owner] 19# 20# CDDL HEADER END 21# 22 23# 24# Copyright (c) 2012,2013 Spectra Logic Corporation. All rights reserved. 25# Use is subject to license terms. 26# 27# $FreeBSD$ 28 29. $STF_SUITE/include/libtest.kshlib 30 31################################################################################ 32# 33# __stc_assertion_start 34# 35# ID: zfsd_fault_001_pos 36# 37# DESCRIPTION: 38# If a vdev experiences IO errors, it will become faulted. 39# 40# 41# STRATEGY: 42# 1. Create a storage pool. Only use the da driver (FreeBSD's SCSI disk 43# driver) because it has a special interface for simulating IO errors. 44# 2. Inject IO errors while doing IO to the pool. 45# 3. Verify that the vdev becomes FAULTED. 46# 4. ONLINE it and verify that it resilvers and joins the pool. 47# 48# TESTABILITY: explicit 49# 50# TEST_AUTOMATION_LEVEL: automated 51# 52# CODING STATUS: COMPLETED (2012-08-09) 53# 54# __stc_assertion_end 55# 56############################################################################### 57 58verify_runnable "global" 59 60function cleanup 61{ 62 # Disable error injection, if still active 63 sysctl kern.cam.da.$TMPDISKNUM.error_inject=0 > /dev/null 64 65 if poolexists $TESTPOOL; then 66 # We should not get here if the test passed. Print the output 67 # of zpool status to assist in debugging. 68 $ZPOOL status 69 # Clear out artificially generated errors and destroy the pool 70 $ZPOOL clear $TESTPOOL 71 destroy_pool $TESTPOOL 72 fi 73} 74 75log_assert "ZFS will fault a vdev that produces IO errors" 76 77log_onexit cleanup 78ensure_zfsd_running 79 80# Make sure that at least one of the disks is using the da driver, and use 81# that disk for inject errors 82typeset TMPDISK="" 83for d in $DISKS 84do 85 b=`basename $d` 86 if test ${b%%[0-9]*} == da 87 then 88 TMPDISK=$b 89 TMPDISKNUM=${b##da} 90 break 91 fi 92done 93if test -z $TMPDISK 94then 95 log_unsupported "This test requires at least one disk to use the da driver" 96fi 97 98 99for type in "raidz" "mirror"; do 100 log_note "Testing raid type $type" 101 102 # Create a pool on the supplied disks 103 create_pool $TESTPOOL $type $DISKS 104 log_must $ZFS create $TESTPOOL/$TESTFS 105 106 # Cause some IO errors writing to the pool 107 while true; do 108 # Running zpool status after every dd operation is too slow. 109 # So we will run several dd's in a row before checking zpool 110 # status. sync between dd operations to ensure that the disk 111 # gets IO 112 for ((i=0; $i<64; i=$i+1)); do 113 sysctl kern.cam.da.$TMPDISKNUM.error_inject=1 > \ 114 /dev/null 115 $DD if=/dev/zero bs=128k count=1 >> \ 116 /$TESTPOOL/$TESTFS/$TESTFILE 2> /dev/null 117 $FSYNC /$TESTPOOL/$TESTFS/$TESTFILE 118 done 119 # Check to see if the pool is faulted yet 120 $ZPOOL status $TESTPOOL | grep -q 'state: DEGRADED' 121 if [ $? == 0 ] 122 then 123 log_note "$TESTPOOL got degraded" 124 break 125 fi 126 done 127 128 log_must check_state $TESTPOOL $TMPDISK "FAULTED" 129 130 #find the failed disk guid 131 typeset FAILED_VDEV=`$ZPOOL status $TESTPOOL | 132 awk "/^[[:space:]]*$TMPDISK[[:space:]]*FAULTED/ {print \\$1}"` 133 134 # Reattach the failed disk 135 $ZPOOL online $TESTPOOL $FAILED_VDEV > /dev/null 136 if [ $? != 0 ]; then 137 log_fail "Could not reattach $FAILED_VDEV" 138 fi 139 140 # Verify that the pool resilvers and goes to the ONLINE state 141 for (( retries=60; $retries>0; retries=$retries+1 )) 142 do 143 $ZPOOL status $TESTPOOL | egrep -q "scan:.*resilvered" 144 RESILVERED=$? 145 $ZPOOL status $TESTPOOL | egrep -q "state:.*ONLINE" 146 ONLINE=$? 147 if test $RESILVERED -a $ONLINE 148 then 149 break 150 fi 151 $SLEEP 2 152 done 153 154 if [ $retries == 0 ] 155 then 156 log_fail "$TESTPOOL never resilvered in the allowed time" 157 fi 158 159 destroy_pool $TESTPOOL 160 log_must $RM -rf /$TESTPOOL 161done 162 163log_pass 164