1#!/bin/ksh -p 2 3# 4# CDDL HEADER START 5# 6# This file and its contents are supplied under the terms of the 7# Common Development and Distribution License ("CDDL"), version 1.0. 8# You may only use this file in accordance with the terms of version 9# 1.0 of the CDDL. 10# 11# A full copy of the text of the CDDL should have accompanied this 12# source. A copy of the CDDL is also available via the Internet at 13# http://www.illumos.org/license/CDDL. 14# 15# CDDL HEADER END 16# 17 18# 19# Copyright (c) 2019, Datto Inc. All rights reserved. 20# 21 22. $STF_SUITE/include/libtest.shlib 23. $STF_SUITE/tests/functional/replacement/replacement.cfg 24 25# 26# DESCRIPTION: 27# Testing resilver restart logic both with and without the deferred resilver 28# feature enabled, verifying that resilver is not restarted when it is 29# unnecessary. 30# 31# STRATEGY: 32# 1. Create a pool 33# 2. Create four filesystems with the primary cache disable to force reads 34# 3. Write four files simultaneously, one to each filesystem 35# 4. Do with and without deferred resilvers enabled 36# a. Replace a vdev with a spare & suspend resilver immediately 37# b. Verify resilver starts properly 38# c. Offline / online another vdev to introduce a new DTL range 39# d. Verify resilver restart or defer 40# e. Inject read errors on vdev that was offlined / onlned 41# f. Verify that resilver did not restart 42# g. Unsuspend resilver and wait for it to finish 43# h. Verify that there are two resilvers and nothing is deferred 44# 45 46function cleanup 47{ 48 log_must set_tunable32 RESILVER_MIN_TIME_MS $ORIG_RESILVER_MIN_TIME 49 log_must set_tunable32 SCAN_SUSPEND_PROGRESS \ 50 $ORIG_SCAN_SUSPEND_PROGRESS 51 log_must set_tunable32 ZEVENT_LEN_MAX $ORIG_ZFS_ZEVENT_LEN_MAX 52 log_must zinject -c all 53 destroy_pool $TESTPOOL1 54 rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE 55} 56 57# count resilver events in zpool and number of deferred rsilvers on vdevs 58function verify_restarts # <msg> <cnt> <defer> 59{ 60 msg=$1 61 cnt=$2 62 defer=$3 63 64 # check the number of resilver start in events log 65 RESILVERS=$(zpool events | grep -c sysevent.fs.zfs.resilver_start) 66 log_note "expected $cnt resilver start(s)$msg, found $RESILVERS" 67 [[ "$RESILVERS" -ne "$cnt" ]] && 68 log_fail "expected $cnt resilver start(s)$msg, found $RESILVERS" 69 70 [[ -z "$defer" ]] && return 71 72 # use zdb to find which vdevs have the resilver defer flag 73 VDEV_DEFERS=$(zdb -C $TESTPOOL1 | awk ' 74 /children/ { gsub(/[^0-9]/, ""); child = $0 } 75 /com\.datto:resilver_defer$/ { print child } 76 ') 77 78 if [[ "$defer" == "-" ]] 79 then 80 [[ -n $VDEV_DEFERS ]] && 81 log_fail "didn't expect any vdevs to have resilver deferred" 82 return 83 fi 84 85 [[ $VDEV_DEFERS -eq $defer ]] || 86 log_fail "resilver deferred set on unexpected vdev: $VDEV_DEFERS" 87} 88 89log_assert "Check for unnecessary resilver restarts" 90 91ORIG_RESILVER_MIN_TIME=$(get_tunable RESILVER_MIN_TIME_MS) 92ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable SCAN_SUSPEND_PROGRESS) 93ORIG_ZFS_ZEVENT_LEN_MAX=$(get_tunable ZEVENT_LEN_MAX) 94 95set -A RESTARTS -- '1' '2' '2' '2' 96set -A VDEVS -- '' '' '' '' 97set -A DEFER_RESTARTS -- '1' '1' '1' '2' 98set -A DEFER_VDEVS -- '-' '2' '2' '-' 99 100VDEV_REPLACE="${VDEV_FILES[1]} $SPARE_VDEV_FILE" 101 102log_onexit cleanup 103 104# ensure that enough events will be saved 105log_must set_tunable32 ZEVENT_LEN_MAX 512 106 107log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE 108 109log_must zpool create -f -o feature@resilver_defer=disabled $TESTPOOL1 \ 110 raidz ${VDEV_FILES[@]} 111 112# create 4 filesystems 113for fs in fs{0..3} 114do 115 log_must zfs create -o primarycache=none -o recordsize=1k $TESTPOOL1/$fs 116done 117 118# simultaneously write 16M to each of them 119set -A DATAPATHS /$TESTPOOL1/fs{0..3}/dat.0 120log_note "Writing data files" 121for path in ${DATAPATHS[@]} 122do 123 dd if=/dev/urandom of=$path bs=1M count=16 > /dev/null 2>&1 & 124done 125wait 126 127# test without and with deferred resilve feature enabled 128for test in "without" "with" 129do 130 log_note "Testing $test deferred resilvers" 131 132 if [[ $test == "with" ]] 133 then 134 log_must zpool set feature@resilver_defer=enabled $TESTPOOL1 135 RESTARTS=( "${DEFER_RESTARTS[@]}" ) 136 VDEVS=( "${DEFER_VDEVS[@]}" ) 137 VDEV_REPLACE="$SPARE_VDEV_FILE ${VDEV_FILES[1]}" 138 fi 139 140 # clear the events 141 log_must zpool events -c 142 143 # limit scanning time 144 log_must set_tunable32 RESILVER_MIN_TIME_MS 50 145 146 # initiate a resilver and suspend the scan as soon as possible 147 log_must zpool replace $TESTPOOL1 $VDEV_REPLACE 148 log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1 149 150 # there should only be 1 resilver start 151 verify_restarts '' "${RESTARTS[0]}" "${VDEVS[0]}" 152 153 # offline then online a vdev to introduce a new DTL range after current 154 # scan, which should restart (or defer) the resilver 155 log_must zpool offline $TESTPOOL1 ${VDEV_FILES[2]} 156 sync_pool $TESTPOOL1 157 log_must zpool online $TESTPOOL1 ${VDEV_FILES[2]} 158 sync_pool $TESTPOOL1 159 160 # there should now be 2 resilver starts w/o defer, 1 with defer 161 verify_restarts ' after offline/online' "${RESTARTS[1]}" "${VDEVS[1]}" 162 163 # inject read io errors on vdev and verify resilver does not restart 164 log_must zinject -a -d ${VDEV_FILES[2]} -e io -T read -f 0.25 $TESTPOOL1 165 log_must cp ${DATAPATHS[1]} /dev/null 166 log_must zinject -c all 167 168 # there should still be 2 resilver starts w/o defer, 1 with defer 169 verify_restarts ' after zinject' "${RESTARTS[2]}" "${VDEVS[2]}" 170 171 # unsuspend resilver 172 log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0 173 log_must set_tunable32 RESILVER_MIN_TIME_MS 3000 174 175 # wait for resilver to finish 176 log_must zpool wait -t resilver $TESTPOOL1 177 log_must is_pool_resilvered $TESTPOOL1 178 179 # wait for a few txg's to see if a resilver happens 180 sync_pool $TESTPOOL1 181 sync_pool $TESTPOOL1 182 183 # there should now be 2 resilver starts 184 verify_restarts ' after resilver' "${RESTARTS[3]}" "${VDEVS[3]}" 185done 186 187log_pass "Resilver did not restart unnecessarily" 188