1#!/bin/ksh -p
2
3#
4# CDDL HEADER START
5#
6# This file and its contents are supplied under the terms of the
7# Common Development and Distribution License ("CDDL"), version 1.0.
8# You may only use this file in accordance with the terms of version
9# 1.0 of the CDDL.
10#
11# A full copy of the text of the CDDL should have accompanied this
12# source.  A copy of the CDDL is also available via the Internet at
13# http://www.illumos.org/license/CDDL.
14#
15# CDDL HEADER END
16#
17
18#
19# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
20#
21
22. $STF_SUITE/include/libtest.shlib
23. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
24
25#
26# DESCRIPTION:
27# Verify dRAID resilver to traditional and distributed spares for
28# a variety of pool configurations and pool states.
29#
30# STRATEGY:
31# 1. For resilvers:
32#    a. Create a semi-random dRAID pool configuration which can
33#       sustain 1 failure and has 5 distributed spares.
34#    b. Fill the pool with data
35#    c. Systematically fault and replace vdevs in the pools with
36#       spares to test resilving in common pool states.
37#    d. Scrub the pool to verify no data was lost
38#    e. Verify the contents of files in the pool
39#
40
41log_assert "Verify dRAID resilver"
42
43function cleanup_tunable
44{
45	log_must set_tunable32 REBUILD_SCRUB_ENABLED 1
46	cleanup
47}
48
49log_onexit cleanup_tunable
50
51#
52# Disable scrubbing after a sequential resilver to verify the resilver
53# alone is able to reconstruct the data without the help of a scrub.
54#
55log_must set_tunable32 REBUILD_SCRUB_ENABLED 0
56
57for replace_mode in "healing" "sequential"; do
58
59	if [[ "$replace_mode" = "sequential" ]]; then
60		flags="-s"
61	else
62		flags=""
63	fi
64
65	parity=1
66	spares=5
67	data=$(random_int_between 1 4)
68	children=10
69	draid="draid${parity}:${data}d:${children}c:${spares}s"
70
71	setup_test_env $TESTPOOL $draid $children
72
73	#
74	# Perform a variety of replacements to normal and distributed spares
75	# for a variety of different vdev configurations to exercise different
76	# resilver code paths. The final configuration is expected to be:
77	#
78	# NAME                                  STATE     READ WRITE CKSUM
79	# testpool                              DEGRADED     0     0     0
80	#   draid1:1d:10c:5s-0                  DEGRADED     0     0     0
81	#     /var/tmp/basedir.28683/new_vdev0  ONLINE       0     0     0
82	#     /var/tmp/basedir.28683/new_vdev1  ONLINE       0     0     0
83	#     spare-2                           DEGRADED     0     0     0
84	#       /var/tmp/basedir.28683/vdev2    FAULTED      0     0     0
85	#       draid1-0-3                      ONLINE       0     0     0
86	#     spare-3                           DEGRADED     0     0     0
87	#       /var/tmp/basedir.28683/vdev3    FAULTED      0     0     0
88	#       draid1-0-4                      ONLINE       0     0     0
89	#     /var/tmp/basedir.28683/vdev4      ONLINE       0     0     0
90	#     /var/tmp/basedir.28683/vdev5      ONLINE       0     0     0
91	#     /var/tmp/basedir.28683/vdev6      ONLINE       0     0     0
92	#     draid1-0-0                        ONLINE       0     0     0
93	#     spare-8                           DEGRADED     0     0     0
94	#       /var/tmp/basedir.28683/vdev8    FAULTED      0     0     0
95	#       draid1-0-1                      ONLINE       0     0     0
96	#     spare-9                           ONLINE       0     0     0
97	#       /var/tmp/basedir.28683/vdev9    ONLINE       0     0     0
98	#       draid1-0-2                      ONLINE       0     0     0
99	# spares
100	#   draid1-0-0                          INUSE     currently in use
101	#   draid1-0-1                          INUSE     currently in use
102	#   draid1-0-2                          INUSE     currently in use
103	#   draid1-0-3                          INUSE     currently in use
104	#   draid1-0-4                          INUSE     currently in use
105	#
106
107	# Distributed spare which replaces original online device
108	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev7 "ONLINE"
109	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev7 draid1-0-0
110	log_must zpool detach $TESTPOOL $BASEDIR/vdev7
111	log_must check_vdev_state $TESTPOOL draid1-0-0 "ONLINE"
112	log_must check_hotspare_state $TESTPOOL draid1-0-0 "INUSE"
113	log_must verify_pool $TESTPOOL
114	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
115	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
116
117	# Distributed spare in mirror with original device faulted
118	log_must zpool offline -f $TESTPOOL $BASEDIR/vdev8
119	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev8 "FAULTED"
120	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev8 draid1-0-1
121	log_must check_vdev_state $TESTPOOL spare-8 "DEGRADED"
122	log_must check_vdev_state $TESTPOOL draid1-0-1 "ONLINE"
123	log_must check_hotspare_state $TESTPOOL draid1-0-1 "INUSE"
124	log_must verify_pool $TESTPOOL
125	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
126	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
127
128	# Distributed spare in mirror with original device still online
129	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev9 "ONLINE"
130	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev9 draid1-0-2
131	log_must check_vdev_state $TESTPOOL spare-9 "ONLINE"
132	log_must check_vdev_state $TESTPOOL draid1-0-2 "ONLINE"
133	log_must check_hotspare_state $TESTPOOL draid1-0-2 "INUSE"
134	log_must verify_pool $TESTPOOL
135	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
136	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
137
138	# Normal faulted device replacement
139	new_vdev0="$BASEDIR/new_vdev0"
140	log_must truncate -s $MINVDEVSIZE $new_vdev0
141	log_must zpool offline -f $TESTPOOL $BASEDIR/vdev0
142	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev0 "FAULTED"
143	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev0 $new_vdev0
144	log_must check_vdev_state $TESTPOOL $new_vdev0 "ONLINE"
145	log_must verify_pool $TESTPOOL
146	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
147	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
148
149	# Distributed spare faulted device replacement
150	log_must zpool offline -f $TESTPOOL $BASEDIR/vdev2
151	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev2 "FAULTED"
152	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev2 draid1-0-3
153	log_must check_vdev_state $TESTPOOL spare-2 "DEGRADED"
154	log_must check_vdev_state $TESTPOOL draid1-0-3 "ONLINE"
155	log_must check_hotspare_state $TESTPOOL draid1-0-3 "INUSE"
156	log_must verify_pool $TESTPOOL
157	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
158	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
159
160	# Normal online device replacement
161	new_vdev1="$BASEDIR/new_vdev1"
162	log_must truncate -s $MINVDEVSIZE $new_vdev1
163	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev1 "ONLINE"
164	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev1 $new_vdev1
165	log_must check_vdev_state $TESTPOOL $new_vdev1 "ONLINE"
166	log_must verify_pool $TESTPOOL
167	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
168	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
169
170	# Distributed spare online device replacement (then fault)
171	log_must zpool replace -w $flags $TESTPOOL $BASEDIR/vdev3 draid1-0-4
172	log_must check_vdev_state $TESTPOOL spare-3 "ONLINE"
173	log_must check_vdev_state $TESTPOOL draid1-0-4 "ONLINE"
174	log_must check_hotspare_state $TESTPOOL draid1-0-4 "INUSE"
175	log_must zpool offline -f $TESTPOOL $BASEDIR/vdev3
176	log_must check_vdev_state $TESTPOOL $BASEDIR/vdev3 "FAULTED"
177	log_must check_vdev_state $TESTPOOL spare-3 "DEGRADED"
178	log_must verify_pool $TESTPOOL
179	log_must check_pool_status $TESTPOOL "scan" "repaired 0B"
180	log_must check_pool_status $TESTPOOL "scan" "with 0 errors"
181
182	# Verify the original data is valid
183	log_must is_data_valid $TESTPOOL
184	log_must check_pool_status $TESTPOOL "errors" "No known data errors"
185
186	cleanup
187done
188
189log_pass "Verify resilver to dRAID distributed spares"
190