1#!/bin/ksh -p
2#
3# CDDL HEADER START
4#
5# The contents of this file are subject to the terms of the
6# Common Development and Distribution License (the "License").
7# You may not use this file except in compliance with the License.
8#
9# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10# or https://opensource.org/licenses/CDDL-1.0.
11# See the License for the specific language governing permissions
12# and limitations under the License.
13#
14# When distributing Covered Code, include this CDDL HEADER in each
15# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16# If applicable, add the following below this CDDL HEADER, with the
17# fields enclosed by brackets "[]" replaced with your own identifying
18# information: Portions Copyright [yyyy] [name of copyright owner]
19#
20# CDDL HEADER END
21#
22
23#
24# Copyright (c) 2022 by Lawrence Livermore National Security, LLC.
25#
26
27. $STF_SUITE/include/libtest.shlib
28. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
29
30#
31# DESCRIPTION:
32#	When sequentially resilvering a dRAID pool to a distributed spare
33#	silent damage to an online vdev in a replacing or spare mirror vdev
34#	is not expected to be repaired.  Not only does the rebuild have no
35#	reason to suspect the silent damage but even if it did there's no
36#	checksum available to determine the correct copy and make the repair.
37#	However, the subsequent scrub should detect and repair any damage.
38#
39# STRATEGY:
40#	1. Create block device files for the test draid pool
41#	2. For each parity value [1..3]
42#		a. Create a draid pool
43#		b. Fill it with some directories/files
44#		c. Systematically damage and replace three devices by:
45#			- Overwrite the device
46#			- Replace the damaged vdev with a distributed spare
47#			- Scrub the pool and verify repair IO is issued
48#		d. Detach the distributed spares
49#		e. Scrub the pool and verify there was nothing to repair
50#		f. Destroy the draid pool
51#
52
53typeset -r devs=7
54typeset -r dev_size_mb=512
55typeset -a disks
56
57prefetch_disable=$(get_tunable PREFETCH_DISABLE)
58rebuild_scrub_enabled=$(get_tunable REBUILD_SCRUB_ENABLED)
59
60function cleanup
61{
62	poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL"
63
64	for i in {0..$devs}; do
65		rm -f "$TEST_BASE_DIR/dev-$i"
66	done
67
68	set_tunable32 PREFETCH_DISABLE $prefetch_disable
69	set_tunable32 REBUILD_SCRUB_ENABLED $rebuild_scrub_enabled
70}
71
72log_onexit cleanup
73
74log_must set_tunable32 PREFETCH_DISABLE 1
75log_must set_tunable32 REBUILD_SCRUB_ENABLED 0
76
77# Disk files which will be used by pool
78for i in {0..$(($devs - 1))}; do
79	device=$TEST_BASE_DIR/dev-$i
80	log_must truncate -s ${dev_size_mb}M $device
81	disks[${#disks[*]}+1]=$device
82done
83
84# Disk file which will be attached
85log_must truncate -s 512M $TEST_BASE_DIR/dev-$devs
86
87dir=$TEST_BASE_DIR
88
89for nparity in 1 2 3; do
90	raid=draid${nparity}:3s
91
92	log_must zpool create -f -O compression=off -o cachefile=none \
93	    $TESTPOOL $raid ${disks[@]}
94	# log_must zfs set primarycache=metadata $TESTPOOL
95
96	log_must zfs create $TESTPOOL/fs
97	log_must fill_fs /$TESTPOOL/fs 1 256 10 1024 R
98
99	log_must zfs create -o compress=on $TESTPOOL/fs2
100	log_must fill_fs /$TESTPOOL/fs2 1 256 10 1024 R
101
102	log_must zfs create -o compress=on -o recordsize=8k $TESTPOOL/fs3
103	log_must fill_fs /$TESTPOOL/fs3 1 256 10 1024 R
104
105	log_must zpool export $TESTPOOL
106	log_must zpool import -o cachefile=none -d $dir $TESTPOOL
107
108	log_must check_pool_status $TESTPOOL "errors" "No known data errors"
109
110	for nspare in 0 1 2; do
111		damaged=$dir/dev-${nspare}
112		spare=draid${nparity}-0-${nspare}
113
114		log_must zpool export $TESTPOOL
115		log_must dd conv=notrunc if=/dev/zero of=$damaged \
116		    bs=1M seek=4 count=$(($dev_size_mb-4))
117		log_must zpool import -o cachefile=none -d $dir $TESTPOOL
118
119		log_must zpool replace -fsw $TESTPOOL $damaged $spare
120
121		# Scrub the pool after the sequential resilver and verify
122		# that the silent damage was repaired by the scrub.
123		log_must zpool scrub -w $TESTPOOL
124		log_must zpool status $TESTPOOL
125		log_must check_pool_status $TESTPOOL "errors" \
126		    "No known data errors"
127		log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
128		log_mustnot check_pool_status $TESTPOOL "scan" "repaired 0B"
129	done
130
131	for nspare in 0 1 2; do
132		log_must check_vdev_state $TESTPOOL \
133		    spare-${nspare} "ONLINE"
134		log_must check_vdev_state $TESTPOOL \
135		    ${dir}/dev-${nspare} "ONLINE"
136		log_must check_vdev_state $TESTPOOL \
137		    draid${nparity}-0-${nspare} "ONLINE"
138	done
139
140	# Detach the distributed spares and scrub the pool again to
141	# verify no damage remained on the originally corrupted vdevs.
142	for nspare in 0 1 2; do
143		log_must zpool detach $TESTPOOL draid${nparity}-0-${nspare}
144	done
145
146	log_must zpool clear $TESTPOOL
147	log_must zpool scrub -w $TESTPOOL
148	log_must zpool status $TESTPOOL
149
150	log_must check_pool_status $TESTPOOL "errors" "No known data errors"
151	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
152	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
153
154	log_must zpool destroy "$TESTPOOL"
155done
156
157log_pass "draid damaged device scrub test succeeded."
158