1#!/bin/ksh -p 2# 3# CDDL HEADER START 4# 5# The contents of this file are subject to the terms of the 6# Common Development and Distribution License (the "License"). 7# You may not use this file except in compliance with the License. 8# 9# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10# or https://opensource.org/licenses/CDDL-1.0. 11# See the License for the specific language governing permissions 12# and limitations under the License. 13# 14# When distributing Covered Code, include this CDDL HEADER in each 15# file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16# If applicable, add the following below this CDDL HEADER, with the 17# fields enclosed by brackets "[]" replaced with your own identifying 18# information: Portions Copyright [yyyy] [name of copyright owner] 19# 20# CDDL HEADER END 21# 22 23# 24# Copyright (c) 2022 by Lawrence Livermore National Security, LLC. 25# 26 27. $STF_SUITE/include/libtest.shlib 28. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib 29 30# 31# DESCRIPTION: 32# When sequentially resilvering a dRAID pool to a distributed spare 33# silent damage to an online vdev in a replacing or spare mirror vdev 34# is not expected to be repaired. Not only does the rebuild have no 35# reason to suspect the silent damage but even if it did there's no 36# checksum available to determine the correct copy and make the repair. 37# However, the subsequent scrub should detect and repair any damage. 38# 39# STRATEGY: 40# 1. Create block device files for the test draid pool 41# 2. For each parity value [1..3] 42# a. Create a draid pool 43# b. Fill it with some directories/files 44# c. Systematically damage and replace three devices by: 45# - Overwrite the device 46# - Replace the damaged vdev with a distributed spare 47# - Scrub the pool and verify repair IO is issued 48# d. Detach the distributed spares 49# e. Scrub the pool and verify there was nothing to repair 50# f. Destroy the draid pool 51# 52 53typeset -r devs=7 54typeset -r dev_size_mb=512 55typeset -a disks 56 57prefetch_disable=$(get_tunable PREFETCH_DISABLE) 58rebuild_scrub_enabled=$(get_tunable REBUILD_SCRUB_ENABLED) 59 60function cleanup 61{ 62 poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL" 63 64 for i in {0..$devs}; do 65 rm -f "$TEST_BASE_DIR/dev-$i" 66 done 67 68 set_tunable32 PREFETCH_DISABLE $prefetch_disable 69 set_tunable32 REBUILD_SCRUB_ENABLED $rebuild_scrub_enabled 70} 71 72log_onexit cleanup 73 74log_must set_tunable32 PREFETCH_DISABLE 1 75log_must set_tunable32 REBUILD_SCRUB_ENABLED 0 76 77# Disk files which will be used by pool 78for i in {0..$(($devs - 1))}; do 79 device=$TEST_BASE_DIR/dev-$i 80 log_must truncate -s ${dev_size_mb}M $device 81 disks[${#disks[*]}+1]=$device 82done 83 84# Disk file which will be attached 85log_must truncate -s 512M $TEST_BASE_DIR/dev-$devs 86 87dir=$TEST_BASE_DIR 88 89for nparity in 1 2 3; do 90 raid=draid${nparity}:3s 91 92 log_must zpool create -f -O compression=off -o cachefile=none \ 93 $TESTPOOL $raid ${disks[@]} 94 # log_must zfs set primarycache=metadata $TESTPOOL 95 96 log_must zfs create $TESTPOOL/fs 97 log_must fill_fs /$TESTPOOL/fs 1 256 10 1024 R 98 99 log_must zfs create -o compress=on $TESTPOOL/fs2 100 log_must fill_fs /$TESTPOOL/fs2 1 256 10 1024 R 101 102 log_must zfs create -o compress=on -o recordsize=8k $TESTPOOL/fs3 103 log_must fill_fs /$TESTPOOL/fs3 1 256 10 1024 R 104 105 log_must zpool export $TESTPOOL 106 log_must zpool import -o cachefile=none -d $dir $TESTPOOL 107 108 log_must check_pool_status $TESTPOOL "errors" "No known data errors" 109 110 for nspare in 0 1 2; do 111 damaged=$dir/dev-${nspare} 112 spare=draid${nparity}-0-${nspare} 113 114 log_must zpool export $TESTPOOL 115 log_must dd conv=notrunc if=/dev/zero of=$damaged \ 116 bs=1M seek=4 count=$(($dev_size_mb-4)) 117 log_must zpool import -o cachefile=none -d $dir $TESTPOOL 118 119 log_must zpool replace -fsw $TESTPOOL $damaged $spare 120 121 # Scrub the pool after the sequential resilver and verify 122 # that the silent damage was repaired by the scrub. 123 log_must zpool scrub -w $TESTPOOL 124 log_must zpool status $TESTPOOL 125 log_must check_pool_status $TESTPOOL "errors" \ 126 "No known data errors" 127 log_must check_pool_status $TESTPOOL "scan" "with 0 errors" 128 log_mustnot check_pool_status $TESTPOOL "scan" "repaired 0B" 129 done 130 131 for nspare in 0 1 2; do 132 log_must check_vdev_state $TESTPOOL \ 133 spare-${nspare} "ONLINE" 134 log_must check_vdev_state $TESTPOOL \ 135 ${dir}/dev-${nspare} "ONLINE" 136 log_must check_vdev_state $TESTPOOL \ 137 draid${nparity}-0-${nspare} "ONLINE" 138 done 139 140 # Detach the distributed spares and scrub the pool again to 141 # verify no damage remained on the originally corrupted vdevs. 142 for nspare in 0 1 2; do 143 log_must zpool detach $TESTPOOL draid${nparity}-0-${nspare} 144 done 145 146 log_must zpool clear $TESTPOOL 147 log_must zpool scrub -w $TESTPOOL 148 log_must zpool status $TESTPOOL 149 150 log_must check_pool_status $TESTPOOL "errors" "No known data errors" 151 log_must check_pool_status $TESTPOOL "scan" "with 0 errors" 152 log_must check_pool_status $TESTPOOL "scan" "repaired 0B" 153 154 log_must zpool destroy "$TESTPOOL" 155done 156 157log_pass "draid damaged device scrub test succeeded." 158