1#!/bin/ksh -p 2 3# 4# CDDL HEADER START 5# 6# This file and its contents are supplied under the terms of the 7# Common Development and Distribution License ("CDDL"), version 1.0. 8# You may only use this file in accordance with the terms of version 9# 1.0 of the CDDL. 10# 11# A full copy of the text of the CDDL should have accompanied this 12# source. A copy of the CDDL is also available via the Internet at 13# http://www.illumos.org/license/CDDL. 14# 15# CDDL HEADER END 16# 17 18# 19# Copyright (c) 2020 by Lawrence Livermore National Security, LLC. 20# 21 22. $STF_SUITE/include/libtest.shlib 23. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib 24 25# 26# DESCRIPTION: 27# Verify dRAID resilver to traditional and distributed spares for 28# a variety of pool configurations and pool states. 29# 30# STRATEGY: 31# 1. For resilvers: 32# a. Create a semi-random dRAID pool configuration which can 33# sustain 1 failure and has 5 distributed spares. 34# b. Fill the pool with data 35# c. Systematically fault and replace vdevs in the pools with 36# spares to test resilving in common pool states. 37# d. Scrub the pool to verify no data was lost 38# e. Verify the contents of files in the pool 39# 40 41log_assert "Verify dRAID resilver" 42 43function cleanup_tunable 44{ 45 log_must set_tunable32 REBUILD_SCRUB_ENABLED 1 46 cleanup 47} 48 49log_onexit cleanup_tunable 50 51if is_kmemleak; then 52 log_unsupported "Test case runs slowly when kmemleak is enabled" 53fi 54 55# 56# Disable scrubbing after a sequential resilver to verify the resilver 57# alone is able to reconstruct the data without the help of a scrub. 58# 59log_must set_tunable32 REBUILD_SCRUB_ENABLED 0 60 61for replace_mode in "healing" "sequential"; do 62 63 if [[ "$replace_mode" = "sequential" ]]; then 64 flags="-s" 65 else 66 flags="" 67 fi 68 69 parity=1 70 spares=5 71 data=$(random_int_between 1 4) 72 children=10 73 draid="draid${parity}:${data}d:${children}c:${spares}s" 74 75 setup_test_env $TESTPOOL $draid $children 76 77 # 78 # Perform a variety of replacements to normal and distributed spares 79 # for a variety of different vdev configurations to exercise different 80 # resilver code paths. The final configuration is expected to be: 81 # 82 # NAME STATE READ WRITE CKSUM 83 # testpool DEGRADED 0 0 0 84 # draid1:1d:10c:5s-0 DEGRADED 0 0 0 85 # /var/tmp/basedir.28683/new_vdev0 ONLINE 0 0 0 86 # /var/tmp/basedir.28683/new_vdev1 ONLINE 0 0 0 87 # spare-2 DEGRADED 0 0 0 88 # /var/tmp/basedir.28683/vdev2 FAULTED 0 0 0 89 # draid1-0-3 ONLINE 0 0 0 90 # spare-3 DEGRADED 0 0 0 91 # /var/tmp/basedir.28683/vdev3 FAULTED 0 0 0 92 # draid1-0-4 ONLINE 0 0 0 93 # /var/tmp/basedir.28683/vdev4 ONLINE 0 0 0 94 # /var/tmp/basedir.28683/vdev5 ONLINE 0 0 0 95 # /var/tmp/basedir.28683/vdev6 ONLINE 0 0 0 96 # draid1-0-0 ONLINE 0 0 0 97 # spare-8 DEGRADED 0 0 0 98 # /var/tmp/basedir.28683/vdev8 FAULTED 0 0 0 99 # draid1-0-1 ONLINE 0 0 0 100 # spare-9 ONLINE 0 0 0 101 # /var/tmp/basedir.28683/vdev9 ONLINE 0 0 0 102 # draid1-0-2 ONLINE 0 0 0 103 # spares 104 # draid1-0-0 INUSE currently in use 105 # draid1-0-1 INUSE currently in use 106 # draid1-0-2 INUSE currently in use 107 # draid1-0-3 INUSE currently in use 108 # draid1-0-4 INUSE currently in use 109 # 110 111 # Distributed spare which replaces original online device 112 log_must check_vdev_state $TESTPOOL $BASEDIR/vdev7 "ONLINE" 113 log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev7 draid1-0-0 114 log_must zpool detach $TESTPOOL $BASEDIR/vdev7 115 log_must check_vdev_state $TESTPOOL draid1-0-0 "ONLINE" 116 log_must check_hotspare_state $TESTPOOL draid1-0-0 "INUSE" 117 log_must verify_pool $TESTPOOL 118 log_must check_pool_status $TESTPOOL "scan" "repaired 0B" 119 log_must check_pool_status $TESTPOOL "scan" "with 0 errors" 120 121 # Distributed spare in mirror with original device faulted 122 log_must zpool offline -f $TESTPOOL $BASEDIR/vdev8 123 log_must check_vdev_state $TESTPOOL $BASEDIR/vdev8 "FAULTED" 124 log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev8 draid1-0-1 125 log_must check_vdev_state $TESTPOOL spare-8 "DEGRADED" 126 log_must check_vdev_state $TESTPOOL draid1-0-1 "ONLINE" 127 log_must check_hotspare_state $TESTPOOL draid1-0-1 "INUSE" 128 log_must verify_pool $TESTPOOL 129 log_must check_pool_status $TESTPOOL "scan" "repaired 0B" 130 log_must check_pool_status $TESTPOOL "scan" "with 0 errors" 131 132 # Distributed spare in mirror with original device still online 133 log_must check_vdev_state $TESTPOOL $BASEDIR/vdev9 "ONLINE" 134 log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev9 draid1-0-2 135 log_must check_vdev_state $TESTPOOL spare-9 "ONLINE" 136 log_must check_vdev_state $TESTPOOL draid1-0-2 "ONLINE" 137 log_must check_hotspare_state $TESTPOOL draid1-0-2 "INUSE" 138 log_must verify_pool $TESTPOOL 139 log_must check_pool_status $TESTPOOL "scan" "repaired 0B" 140 log_must check_pool_status $TESTPOOL "scan" "with 0 errors" 141 142 # Normal faulted device replacement 143 new_vdev0="$BASEDIR/new_vdev0" 144 log_must truncate -s $MINVDEVSIZE $new_vdev0 145 log_must zpool offline -f $TESTPOOL $BASEDIR/vdev0 146 log_must check_vdev_state $TESTPOOL $BASEDIR/vdev0 "FAULTED" 147 log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev0 $new_vdev0 148 log_must check_vdev_state $TESTPOOL $new_vdev0 "ONLINE" 149 log_must verify_pool $TESTPOOL 150 log_must check_pool_status $TESTPOOL "scan" "repaired 0B" 151 log_must check_pool_status $TESTPOOL "scan" "with 0 errors" 152 153 # Distributed spare faulted device replacement 154 log_must zpool offline -f $TESTPOOL $BASEDIR/vdev2 155 log_must check_vdev_state $TESTPOOL $BASEDIR/vdev2 "FAULTED" 156 log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev2 draid1-0-3 157 log_must check_vdev_state $TESTPOOL spare-2 "DEGRADED" 158 log_must check_vdev_state $TESTPOOL draid1-0-3 "ONLINE" 159 log_must check_hotspare_state $TESTPOOL draid1-0-3 "INUSE" 160 log_must verify_pool $TESTPOOL 161 log_must check_pool_status $TESTPOOL "scan" "repaired 0B" 162 log_must check_pool_status $TESTPOOL "scan" "with 0 errors" 163 164 # Normal online device replacement 165 new_vdev1="$BASEDIR/new_vdev1" 166 log_must truncate -s $MINVDEVSIZE $new_vdev1 167 log_must check_vdev_state $TESTPOOL $BASEDIR/vdev1 "ONLINE" 168 log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev1 $new_vdev1 169 log_must check_vdev_state $TESTPOOL $new_vdev1 "ONLINE" 170 log_must verify_pool $TESTPOOL 171 log_must check_pool_status $TESTPOOL "scan" "repaired 0B" 172 log_must check_pool_status $TESTPOOL "scan" "with 0 errors" 173 174 # Distributed spare online device replacement (then fault) 175 log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev3 draid1-0-4 176 log_must check_vdev_state $TESTPOOL spare-3 "ONLINE" 177 log_must check_vdev_state $TESTPOOL draid1-0-4 "ONLINE" 178 log_must check_hotspare_state $TESTPOOL draid1-0-4 "INUSE" 179 log_must zpool offline -f $TESTPOOL $BASEDIR/vdev3 180 log_must check_vdev_state $TESTPOOL $BASEDIR/vdev3 "FAULTED" 181 log_must check_vdev_state $TESTPOOL spare-3 "DEGRADED" 182 log_must verify_pool $TESTPOOL 183 log_must check_pool_status $TESTPOOL "scan" "repaired 0B" 184 log_must check_pool_status $TESTPOOL "scan" "with 0 errors" 185 186 # Verify the original data is valid 187 log_must is_data_valid $TESTPOOL 188 log_must check_pool_status $TESTPOOL "errors" "No known data errors" 189 190 cleanup 191done 192 193log_pass "Verify resilver to dRAID distributed spares" 194