1#!/bin/ksh -p 2# 3# CDDL HEADER START 4# 5# This file and its contents are supplied under the terms of the 6# Common Development and Distribution License ("CDDL"), version 1.0. 7# You may only use this file in accordance with the terms of version 8# 1.0 of the CDDL. 9# 10# A full copy of the text of the CDDL should have accompanied this 11# source. A copy of the CDDL is also available via the Internet at 12# http://www.illumos.org/license/CDDL. 13# 14# CDDL HEADER END 15# 16 17# 18# Copyright 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved. 19# 20 21. $STF_SUITE/include/libtest.shlib 22. $STF_SUITE/tests/functional/events/events_common.kshlib 23. $STF_SUITE/tests/functional/fault/fault.cfg 24 25# 26# DESCRIPTION: 27# Testing Fault Management Agent ZED Logic - Physically removed device is 28# made unavail and onlined when reattached 29# 30# STRATEGY: 31# 1. Create a pool 32# 2. Simulate physical removal of one device 33# 3. Verify the device is unavailable 34# 4. Reattach the device 35# 5. Verify the device is onlined 36# 6. Repeat the same tests with a spare device: 37# zed will use the spare to handle the removed data device 38# 7. Repeat the same tests again with a faulted spare device: 39# the removed data device should be unavailable 40# 41# NOTE: the use of 'block_device_wait' throughout the test helps avoid race 42# conditions caused by mixing creation/removal events from partitioning the 43# disk (zpool create) and events from physically removing it (remove_disk). 44# 45# NOTE: the test relies on 'zpool sync' to prompt the kmods to transition a 46# vdev to the unavailable state. The ZED does receive a removal notification 47# but only relies on it to activate a hot spare. Additional work is planned 48# to extend an existing ioctl interface to allow the ZED to transition the 49# vdev in to a removed state. 50# 51verify_runnable "both" 52 53if is_linux; then 54 # Add one 512b scsi_debug device (4Kn would generate IO errors) 55 # NOTE: must be larger than other "file" vdevs and minimum SPA devsize: 56 # add 32m of fudge 57 load_scsi_debug $(($MINVDEVSIZE/1024/1024+32)) 1 1 1 '512b' 58else 59 log_unsupported "scsi debug module unsupported" 60fi 61 62function cleanup 63{ 64 destroy_pool $TESTPOOL 65 rm -f $filedev1 66 rm -f $filedev2 67 rm -f $filedev3 68 rm -f $sparedev 69 unload_scsi_debug 70} 71 72log_assert "ZED detects physically removed devices" 73 74log_onexit cleanup 75 76filedev1="$TEST_BASE_DIR/file-vdev-1" 77filedev2="$TEST_BASE_DIR/file-vdev-2" 78filedev3="$TEST_BASE_DIR/file-vdev-3" 79sparedev="$TEST_BASE_DIR/file-vdev-spare" 80removedev=$(get_debug_device) 81 82typeset poolconfs=( 83 "mirror $filedev1 $removedev" 84 "raidz3 $filedev1 $filedev2 $filedev3 $removedev" 85 "mirror $filedev1 $filedev2 special mirror $filedev3 $removedev" 86) 87 88log_must truncate -s $MINVDEVSIZE $filedev1 89log_must truncate -s $MINVDEVSIZE $filedev2 90log_must truncate -s $MINVDEVSIZE $filedev3 91log_must truncate -s $MINVDEVSIZE $sparedev 92 93for conf in "${poolconfs[@]}" 94do 95 # 1. Create a pool 96 log_must zpool create -f $TESTPOOL $conf 97 block_device_wait ${DEV_DSKDIR}/${removedev} 98 99 mntpnt=$(get_prop mountpoint /$TESTPOOL) || 100 log_fail "get_prop mountpoint /$TESTPOOL" 101 102 # 2. Simulate physical removal of one device 103 remove_disk $removedev 104 log_must mkfile 1m $mntpnt/file 105 log_must zpool sync $TESTPOOL 106 107 # 3. Verify the device is unavailable. 108 log_must wait_vdev_state $TESTPOOL $removedev "UNAVAIL" 109 110 # 4. Reattach the device 111 insert_disk $removedev 112 113 # 5. Verify the device is onlined 114 log_must wait_vdev_state $TESTPOOL $removedev "ONLINE" 115 116 # cleanup 117 destroy_pool $TESTPOOL 118 log_must parted "${DEV_DSKDIR}/${removedev}" -s -- mklabel msdos 119 block_device_wait ${DEV_DSKDIR}/${removedev} 120done 121 122# 6. Repeat the same tests with a spare device: zed will use the spare to handle 123# the removed data device 124for conf in "${poolconfs[@]}" 125do 126 # 1. Create a pool with a spare 127 log_must zpool create -f $TESTPOOL $conf 128 block_device_wait ${DEV_DSKDIR}/${removedev} 129 log_must zpool add $TESTPOOL spare $sparedev 130 131 mntpnt=$(get_prop mountpoint /$TESTPOOL) || 132 log_fail "get_prop mountpoint /$TESTPOOL" 133 134 # 2. Simulate physical removal of one device 135 remove_disk $removedev 136 log_must mkfile 1m $mntpnt/file 137 log_must zpool sync $TESTPOOL 138 139 # 3. Verify the device is handled by the spare. 140 log_must wait_hotspare_state $TESTPOOL $sparedev "INUSE" 141 log_must wait_vdev_state $TESTPOOL $removedev "UNAVAIL" 142 143 # 4. Reattach the device 144 insert_disk $removedev 145 146 # 5. Verify the device is onlined 147 log_must wait_vdev_state $TESTPOOL $removedev "ONLINE" 148 149 # cleanup 150 destroy_pool $TESTPOOL 151 log_must parted "${DEV_DSKDIR}/${removedev}" -s -- mklabel msdos 152 block_device_wait ${DEV_DSKDIR}/${removedev} 153done 154 155# 7. Repeat the same tests again with a faulted spare device: zed should offline 156# the removed data device if no spare is available 157for conf in "${poolconfs[@]}" 158do 159 # 1. Create a pool with a spare 160 log_must zpool create -f $TESTPOOL $conf 161 block_device_wait ${DEV_DSKDIR}/${removedev} 162 log_must zpool add $TESTPOOL spare $sparedev 163 164 mntpnt=$(get_prop mountpoint /$TESTPOOL) || 165 log_fail "get_prop mountpoint /$TESTPOOL" 166 167 # 2. Fault the spare device making it unavailable 168 log_must zpool offline -f $TESTPOOL $sparedev 169 log_must wait_hotspare_state $TESTPOOL $sparedev "FAULTED" 170 171 # 3. Simulate physical removal of one device 172 remove_disk $removedev 173 log_must mkfile 1m $mntpnt/file 174 log_must zpool sync $TESTPOOL 175 176 # 4. Verify the device is unavailable 177 log_must wait_vdev_state $TESTPOOL $removedev "UNAVAIL" 178 179 # 5. Reattach the device 180 insert_disk $removedev 181 182 # 6. Verify the device is onlined 183 log_must wait_vdev_state $TESTPOOL $removedev "ONLINE" 184 185 # cleanup 186 destroy_pool $TESTPOOL 187 log_must parted "${DEV_DSKDIR}/${removedev}" -s -- mklabel msdos 188 block_device_wait ${DEV_DSKDIR}/${removedev} 189done 190 191log_pass "ZED detects physically removed devices" 192