1#!/bin/ksh -p
2
3#
4# CDDL HEADER START
5#
6# This file and its contents are supplied under the terms of the
7# Common Development and Distribution License ("CDDL"), version 1.0.
8# You may only use this file in accordance with the terms of version
9# 1.0 of the CDDL.
10#
11# A full copy of the text of the CDDL should have accompanied this
12# source.  A copy of the CDDL is also available via the Internet at
13# http://www.illumos.org/license/CDDL.
14#
15# CDDL HEADER END
16#
17
18#
19# Copyright (c) 2019, Datto Inc. All rights reserved.
20#
21
22. $STF_SUITE/include/libtest.shlib
23. $STF_SUITE/tests/functional/replacement/replacement.cfg
24
25#
26# DESCRIPTION:
27# Testing resilver restart logic both with and without the deferred resilver
28# feature enabled, verifying that resilver is not restarted when it is
29# unnecessary.
30#
31# STRATEGY:
32# 1. Create a pool
33# 2. Create four filesystems with the primary cache disable to force reads
34# 3. Write four files simultaneously, one to each filesystem
35# 4. Do with and without deferred resilvers enabled
36#    a. Replace a vdev with a spare & suspend resilver immediately
37#    b. Verify resilver starts properly
38#    c. Offline / online another vdev to introduce a new DTL range
39#    d. Verify resilver restart or defer
40#    e. Inject read errors on vdev that was offlined / onlned
41#    f. Verify that resilver did not restart
42#    g. Unsuspend resilver and wait for it to finish
43#    h. Verify that there are two resilvers and nothing is deferred
44#
45
46function cleanup
47{
48	log_must set_tunable32 RESILVER_MIN_TIME_MS $ORIG_RESILVER_MIN_TIME
49	log_must set_tunable32 SCAN_SUSPEND_PROGRESS \
50	    $ORIG_SCAN_SUSPEND_PROGRESS
51	log_must set_tunable32 ZEVENT_LEN_MAX $ORIG_ZFS_ZEVENT_LEN_MAX
52	log_must zinject -c all
53	destroy_pool $TESTPOOL1
54	rm -f ${VDEV_FILES[@]} $SPARE_VDEV_FILE
55}
56
57# count resilver events in zpool and number of deferred rsilvers on vdevs
58function verify_restarts # <msg> <cnt> <defer>
59{
60	msg=$1
61	cnt=$2
62	defer=$3
63
64	# check the number of resilver start in events log
65	RESILVERS=$(zpool events | grep -c sysevent.fs.zfs.resilver_start)
66	log_note "expected $cnt resilver start(s)$msg, found $RESILVERS"
67	[[ "$RESILVERS" -ne "$cnt" ]] &&
68	    log_fail "expected $cnt resilver start(s)$msg, found $RESILVERS"
69
70	[[ -z "$defer" ]] && return
71
72	# use zdb to find which vdevs have the resilver defer flag
73	VDEV_DEFERS=$(zdb -C $TESTPOOL1 | awk '
74	    /children/ { gsub(/[^0-9]/, ""); child = $0 }
75	    /com\.datto:resilver_defer$/ { print child }
76	')
77
78	if [[ "$defer" == "-" ]]
79	then
80		[[ -n $VDEV_DEFERS ]] &&
81		    log_fail "didn't expect any vdevs to have resilver deferred"
82		return
83	fi
84
85	[[ $VDEV_DEFERS -eq $defer ]] ||
86	    log_fail "resilver deferred set on unexpected vdev: $VDEV_DEFERS"
87}
88
89log_assert "Check for unnecessary resilver restarts"
90
91ORIG_RESILVER_MIN_TIME=$(get_tunable RESILVER_MIN_TIME_MS)
92ORIG_SCAN_SUSPEND_PROGRESS=$(get_tunable SCAN_SUSPEND_PROGRESS)
93ORIG_ZFS_ZEVENT_LEN_MAX=$(get_tunable ZEVENT_LEN_MAX)
94
95set -A RESTARTS -- '1' '2' '2' '2'
96set -A VDEVS -- '' '' '' ''
97set -A DEFER_RESTARTS -- '1' '1' '1' '2'
98set -A DEFER_VDEVS -- '-' '2' '2' '-'
99
100VDEV_REPLACE="${VDEV_FILES[1]} $SPARE_VDEV_FILE"
101
102log_onexit cleanup
103
104# ensure that enough events will be saved
105log_must set_tunable32 ZEVENT_LEN_MAX 512
106
107log_must truncate -s $VDEV_FILE_SIZE ${VDEV_FILES[@]} $SPARE_VDEV_FILE
108
109log_must zpool create -f -o feature@resilver_defer=disabled $TESTPOOL1 \
110    raidz ${VDEV_FILES[@]}
111
112# create 4 filesystems
113for fs in fs{0..3}
114do
115	log_must zfs create -o primarycache=none -o recordsize=1k $TESTPOOL1/$fs
116done
117
118# simultaneously write 16M to each of them
119set -A DATAPATHS /$TESTPOOL1/fs{0..3}/dat.0
120log_note "Writing data files"
121for path in ${DATAPATHS[@]}
122do
123	dd if=/dev/urandom of=$path bs=1M count=16 > /dev/null 2>&1 &
124done
125wait
126
127# test without and with deferred resilve feature enabled
128for test in "without" "with"
129do
130	log_note "Testing $test deferred resilvers"
131
132	if [[ $test == "with" ]]
133	then
134		log_must zpool set feature@resilver_defer=enabled $TESTPOOL1
135		RESTARTS=( "${DEFER_RESTARTS[@]}" )
136		VDEVS=( "${DEFER_VDEVS[@]}" )
137		VDEV_REPLACE="$SPARE_VDEV_FILE ${VDEV_FILES[1]}"
138	fi
139
140	# clear the events
141	log_must zpool events -c
142
143	# limit scanning time
144	log_must set_tunable32 RESILVER_MIN_TIME_MS 50
145
146	# initiate a resilver and suspend the scan as soon as possible
147	log_must zpool replace $TESTPOOL1 $VDEV_REPLACE
148	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 1
149
150	# there should only be 1 resilver start
151	verify_restarts '' "${RESTARTS[0]}" "${VDEVS[0]}"
152
153	# offline then online a vdev to introduce a new DTL range after current
154	# scan, which should restart (or defer) the resilver
155	log_must zpool offline $TESTPOOL1 ${VDEV_FILES[2]}
156	sync_pool $TESTPOOL1
157	log_must zpool online $TESTPOOL1 ${VDEV_FILES[2]}
158	sync_pool $TESTPOOL1
159
160	# there should now be 2 resilver starts w/o defer, 1 with defer
161	verify_restarts ' after offline/online' "${RESTARTS[1]}" "${VDEVS[1]}"
162
163	# inject read io errors on vdev and verify resilver does not restart
164	log_must zinject -a -d ${VDEV_FILES[2]} -e io -T read -f 0.25 $TESTPOOL1
165	log_must cp ${DATAPATHS[1]} /dev/null
166	log_must zinject -c all
167
168	# there should still be 2 resilver starts w/o defer, 1 with defer
169	verify_restarts ' after zinject' "${RESTARTS[2]}" "${VDEVS[2]}"
170
171	# unsuspend resilver
172	log_must set_tunable32 SCAN_SUSPEND_PROGRESS 0
173	log_must set_tunable32 RESILVER_MIN_TIME_MS 3000
174
175	# wait for resilver to finish
176	log_must zpool wait -t resilver $TESTPOOL1
177	log_must is_pool_resilvered $TESTPOOL1
178
179	# wait for a few txg's to see if a resilver happens
180	sync_pool $TESTPOOL1
181	sync_pool $TESTPOOL1
182
183	# there should now be 2 resilver starts
184	verify_restarts ' after resilver' "${RESTARTS[3]}" "${VDEVS[3]}"
185done
186
187log_pass "Resilver did not restart unnecessarily"
188